From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:27:49 +0200 Subject: Adding upstream version 6.6.15. Signed-off-by: Daniel Baumann --- arch/powerpc/platforms/40x/Kconfig | 78 + arch/powerpc/platforms/40x/Makefile | 2 + arch/powerpc/platforms/40x/ppc40x_simple.c | 77 + arch/powerpc/platforms/44x/44x.h | 12 + arch/powerpc/platforms/44x/Kconfig | 319 +++ arch/powerpc/platforms/44x/Makefile | 14 + arch/powerpc/platforms/44x/canyonlands.c | 117 + arch/powerpc/platforms/44x/ebony.c | 61 + arch/powerpc/platforms/44x/fsp2.c | 316 ++ arch/powerpc/platforms/44x/fsp2.h | 272 ++ arch/powerpc/platforms/44x/idle.c | 54 + arch/powerpc/platforms/44x/iss4xx.c | 150 + arch/powerpc/platforms/44x/machine_check.c | 87 + arch/powerpc/platforms/44x/misc_44x.S | 41 + arch/powerpc/platforms/44x/ppc44x_simple.c | 85 + arch/powerpc/platforms/44x/ppc476.c | 290 ++ arch/powerpc/platforms/44x/ppc476_modules.lds | 15 + arch/powerpc/platforms/44x/sam440ep.c | 69 + arch/powerpc/platforms/44x/warp.c | 329 +++ arch/powerpc/platforms/4xx/Makefile | 7 + arch/powerpc/platforms/4xx/cpm.c | 332 +++ arch/powerpc/platforms/4xx/gpio.c | 195 ++ arch/powerpc/platforms/4xx/hsta_msi.c | 208 ++ arch/powerpc/platforms/4xx/machine_check.c | 23 + arch/powerpc/platforms/4xx/pci.c | 2182 ++++++++++++++ arch/powerpc/platforms/4xx/pci.h | 505 ++++ arch/powerpc/platforms/4xx/soc.c | 218 ++ arch/powerpc/platforms/4xx/uic.c | 331 +++ arch/powerpc/platforms/512x/Kconfig | 42 + arch/powerpc/platforms/512x/Makefile | 10 + arch/powerpc/platforms/512x/clock-commonclk.c | 1224 ++++++++ arch/powerpc/platforms/512x/mpc5121_ads.c | 71 + arch/powerpc/platforms/512x/mpc5121_ads.h | 12 + arch/powerpc/platforms/512x/mpc5121_ads_cpld.c | 198 ++ arch/powerpc/platforms/512x/mpc512x.h | 18 + arch/powerpc/platforms/512x/mpc512x_generic.c | 51 + arch/powerpc/platforms/512x/mpc512x_lpbfifo.c | 518 ++++ arch/powerpc/platforms/512x/mpc512x_shared.c | 506 ++++ arch/powerpc/platforms/512x/pdm360ng.c | 126 + arch/powerpc/platforms/52xx/Kconfig | 56 + arch/powerpc/platforms/52xx/Makefile | 16 + arch/powerpc/platforms/52xx/efika.c | 233 ++ arch/powerpc/platforms/52xx/lite5200.c | 192 ++ arch/powerpc/platforms/52xx/lite5200_pm.c | 249 ++ arch/powerpc/platforms/52xx/lite5200_sleep.S | 422 +++ arch/powerpc/platforms/52xx/media5200.c | 239 ++ arch/powerpc/platforms/52xx/mpc5200_simple.c | 79 + arch/powerpc/platforms/52xx/mpc52xx_common.c | 306 ++ arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 780 +++++ arch/powerpc/platforms/52xx/mpc52xx_pci.c | 419 +++ arch/powerpc/platforms/52xx/mpc52xx_pic.c | 519 ++++ arch/powerpc/platforms/52xx/mpc52xx_pm.c | 206 ++ arch/powerpc/platforms/52xx/mpc52xx_sleep.S | 155 + arch/powerpc/platforms/82xx/Kconfig | 28 + arch/powerpc/platforms/82xx/Makefile | 7 + arch/powerpc/platforms/82xx/ep8248e.c | 307 ++ arch/powerpc/platforms/82xx/km82xx.c | 199 ++ arch/powerpc/platforms/82xx/pq2.c | 34 + arch/powerpc/platforms/82xx/pq2.h | 21 + arch/powerpc/platforms/83xx/Kconfig | 92 + arch/powerpc/platforms/83xx/Makefile | 18 + arch/powerpc/platforms/83xx/asp834x.c | 45 + arch/powerpc/platforms/83xx/km83xx.c | 188 ++ arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 224 ++ arch/powerpc/platforms/83xx/misc.c | 149 + arch/powerpc/platforms/83xx/mpc830x_rdb.c | 57 + arch/powerpc/platforms/83xx/mpc831x_rdb.c | 57 + arch/powerpc/platforms/83xx/mpc832x_rdb.c | 227 ++ arch/powerpc/platforms/83xx/mpc834x_itx.c | 70 + arch/powerpc/platforms/83xx/mpc836x_rdk.c | 41 + arch/powerpc/platforms/83xx/mpc837x_rdb.c | 82 + arch/powerpc/platforms/83xx/mpc83xx.h | 83 + arch/powerpc/platforms/83xx/suspend-asm.S | 551 ++++ arch/powerpc/platforms/83xx/suspend.c | 431 +++ arch/powerpc/platforms/83xx/usb_831x.c | 128 + arch/powerpc/platforms/83xx/usb_834x.c | 90 + arch/powerpc/platforms/83xx/usb_837x.c | 58 + arch/powerpc/platforms/85xx/Kconfig | 291 ++ arch/powerpc/platforms/85xx/Makefile | 37 + arch/powerpc/platforms/85xx/bsc913x_qds.c | 63 + arch/powerpc/platforms/85xx/bsc913x_rdb.c | 50 + arch/powerpc/platforms/85xx/c293pcie.c | 54 + arch/powerpc/platforms/85xx/common.c | 106 + arch/powerpc/platforms/85xx/corenet_generic.c | 203 ++ arch/powerpc/platforms/85xx/ge_imp3a.c | 207 ++ arch/powerpc/platforms/85xx/ksi8560.c | 184 ++ arch/powerpc/platforms/85xx/mpc8536_ds.c | 66 + arch/powerpc/platforms/85xx/mpc85xx.h | 24 + arch/powerpc/platforms/85xx/mpc85xx_8259.c | 64 + arch/powerpc/platforms/85xx/mpc85xx_ds.c | 98 + arch/powerpc/platforms/85xx/mpc85xx_mds.c | 372 +++ arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 107 + arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 208 ++ arch/powerpc/platforms/85xx/mvme2500.c | 57 + arch/powerpc/platforms/85xx/p1010rdb.c | 77 + arch/powerpc/platforms/85xx/p1022_ds.c | 563 ++++ arch/powerpc/platforms/85xx/p1022_rdk.c | 143 + arch/powerpc/platforms/85xx/p1023_rdb.c | 107 + arch/powerpc/platforms/85xx/p2020.c | 81 + arch/powerpc/platforms/85xx/ppa8548.c | 83 + arch/powerpc/platforms/85xx/qemu_e500.c | 63 + arch/powerpc/platforms/85xx/sgy_cts1000.c | 156 + arch/powerpc/platforms/85xx/smp.c | 519 ++++ arch/powerpc/platforms/85xx/smp.h | 17 + arch/powerpc/platforms/85xx/socrates.c | 79 + arch/powerpc/platforms/85xx/socrates_fpga_pic.c | 308 ++ arch/powerpc/platforms/85xx/socrates_fpga_pic.h | 11 + arch/powerpc/platforms/85xx/stx_gp3.c | 94 + arch/powerpc/platforms/85xx/t1042rdb_diu.c | 152 + arch/powerpc/platforms/85xx/tqm85xx.c | 131 + arch/powerpc/platforms/85xx/twr_p102x.c | 117 + arch/powerpc/platforms/85xx/xes_mpc85xx.c | 176 ++ arch/powerpc/platforms/86xx/Kconfig | 61 + arch/powerpc/platforms/86xx/Makefile | 11 + arch/powerpc/platforms/86xx/common.c | 43 + arch/powerpc/platforms/86xx/gef_ppc9a.c | 192 ++ arch/powerpc/platforms/86xx/gef_sbc310.c | 179 ++ arch/powerpc/platforms/86xx/gef_sbc610.c | 169 ++ arch/powerpc/platforms/86xx/mpc86xx.h | 19 + arch/powerpc/platforms/86xx/mpc86xx_smp.c | 118 + arch/powerpc/platforms/86xx/mvme7100.c | 114 + arch/powerpc/platforms/86xx/pic.c | 69 + arch/powerpc/platforms/8xx/Kconfig | 200 ++ arch/powerpc/platforms/8xx/Makefile | 12 + arch/powerpc/platforms/8xx/adder875.c | 105 + arch/powerpc/platforms/8xx/cpm1-ic.c | 188 ++ arch/powerpc/platforms/8xx/cpm1.c | 636 ++++ arch/powerpc/platforms/8xx/ep88xc.c | 170 ++ arch/powerpc/platforms/8xx/m8xx_setup.c | 172 ++ arch/powerpc/platforms/8xx/machine_check.c | 34 + arch/powerpc/platforms/8xx/mpc86xads.h | 47 + arch/powerpc/platforms/8xx/mpc86xads_setup.c | 145 + arch/powerpc/platforms/8xx/mpc885ads.h | 49 + arch/powerpc/platforms/8xx/mpc885ads_setup.c | 217 ++ arch/powerpc/platforms/8xx/mpc8xx.h | 20 + arch/powerpc/platforms/8xx/pic.c | 155 + arch/powerpc/platforms/8xx/pic.h | 19 + arch/powerpc/platforms/8xx/tqm8xx_setup.c | 148 + arch/powerpc/platforms/Kconfig | 307 ++ arch/powerpc/platforms/Kconfig.cputype | 646 +++++ arch/powerpc/platforms/Makefile | 26 + arch/powerpc/platforms/amigaone/Kconfig | 19 + arch/powerpc/platforms/amigaone/Makefile | 2 + arch/powerpc/platforms/amigaone/setup.c | 168 ++ arch/powerpc/platforms/book3s/Kconfig | 15 + arch/powerpc/platforms/book3s/Makefile | 2 + arch/powerpc/platforms/book3s/vas-api.c | 634 ++++ arch/powerpc/platforms/cell/Kconfig | 104 + arch/powerpc/platforms/cell/Makefile | 27 + arch/powerpc/platforms/cell/axon_msi.c | 481 ++++ arch/powerpc/platforms/cell/cbe_powerbutton.c | 105 + arch/powerpc/platforms/cell/cbe_regs.c | 298 ++ arch/powerpc/platforms/cell/cbe_thermal.c | 386 +++ arch/powerpc/platforms/cell/cell.h | 15 + arch/powerpc/platforms/cell/cpufreq_spudemand.c | 133 + arch/powerpc/platforms/cell/interrupt.c | 390 +++ arch/powerpc/platforms/cell/interrupt.h | 90 + arch/powerpc/platforms/cell/iommu.c | 1094 +++++++ arch/powerpc/platforms/cell/pervasive.c | 125 + arch/powerpc/platforms/cell/pervasive.h | 26 + arch/powerpc/platforms/cell/pmu.c | 412 +++ arch/powerpc/platforms/cell/ras.c | 352 +++ arch/powerpc/platforms/cell/ras.h | 13 + arch/powerpc/platforms/cell/setup.c | 274 ++ arch/powerpc/platforms/cell/smp.c | 161 ++ arch/powerpc/platforms/cell/spider-pci.c | 170 ++ arch/powerpc/platforms/cell/spider-pic.c | 344 +++ arch/powerpc/platforms/cell/spu_base.c | 790 +++++ arch/powerpc/platforms/cell/spu_callbacks.c | 64 + arch/powerpc/platforms/cell/spu_manage.c | 530 ++++ arch/powerpc/platforms/cell/spu_priv1_mmio.c | 167 ++ arch/powerpc/platforms/cell/spu_priv1_mmio.h | 14 + arch/powerpc/platforms/cell/spu_syscalls.c | 165 ++ arch/powerpc/platforms/cell/spufs/.gitignore | 3 + arch/powerpc/platforms/cell/spufs/Makefile | 63 + arch/powerpc/platforms/cell/spufs/backing_ops.c | 400 +++ arch/powerpc/platforms/cell/spufs/context.c | 175 ++ arch/powerpc/platforms/cell/spufs/coredump.c | 182 ++ arch/powerpc/platforms/cell/spufs/fault.c | 167 ++ arch/powerpc/platforms/cell/spufs/file.c | 2633 +++++++++++++++++ arch/powerpc/platforms/cell/spufs/gang.c | 74 + arch/powerpc/platforms/cell/spufs/hw_ops.c | 335 +++ arch/powerpc/platforms/cell/spufs/inode.c | 826 ++++++ arch/powerpc/platforms/cell/spufs/lscsa_alloc.c | 50 + arch/powerpc/platforms/cell/spufs/run.c | 451 +++ arch/powerpc/platforms/cell/spufs/sched.c | 1141 ++++++++ arch/powerpc/platforms/cell/spufs/spu_restore.c | 322 +++ .../platforms/cell/spufs/spu_restore_crt0.S | 102 + .../cell/spufs/spu_restore_dump.h_shipped | 935 ++++++ arch/powerpc/platforms/cell/spufs/spu_save.c | 181 ++ arch/powerpc/platforms/cell/spufs/spu_save_crt0.S | 88 + .../platforms/cell/spufs/spu_save_dump.h_shipped | 743 +++++ arch/powerpc/platforms/cell/spufs/spu_utils.h | 147 + arch/powerpc/platforms/cell/spufs/spufs.h | 356 +++ arch/powerpc/platforms/cell/spufs/sputrace.h | 41 + arch/powerpc/platforms/cell/spufs/switch.c | 2206 ++++++++++++++ arch/powerpc/platforms/cell/spufs/syscalls.c | 89 + arch/powerpc/platforms/chrp/Kconfig | 16 + arch/powerpc/platforms/chrp/Makefile | 4 + arch/powerpc/platforms/chrp/chrp.h | 11 + arch/powerpc/platforms/chrp/gg2.h | 61 + arch/powerpc/platforms/chrp/nvram.c | 95 + arch/powerpc/platforms/chrp/pci.c | 387 +++ arch/powerpc/platforms/chrp/pegasos_eth.c | 201 ++ arch/powerpc/platforms/chrp/setup.c | 586 ++++ arch/powerpc/platforms/chrp/smp.c | 54 + arch/powerpc/platforms/chrp/time.c | 159 + arch/powerpc/platforms/embedded6xx/Kconfig | 94 + arch/powerpc/platforms/embedded6xx/Makefile | 12 + arch/powerpc/platforms/embedded6xx/flipper-pic.c | 244 ++ arch/powerpc/platforms/embedded6xx/flipper-pic.h | 20 + arch/powerpc/platforms/embedded6xx/gamecube.c | 89 + arch/powerpc/platforms/embedded6xx/hlwd-pic.c | 235 ++ arch/powerpc/platforms/embedded6xx/hlwd-pic.h | 17 + arch/powerpc/platforms/embedded6xx/holly.c | 268 ++ arch/powerpc/platforms/embedded6xx/linkstation.c | 162 ++ arch/powerpc/platforms/embedded6xx/ls_uart.c | 147 + arch/powerpc/platforms/embedded6xx/mpc10x.h | 159 + arch/powerpc/platforms/embedded6xx/mvme5100.c | 208 ++ arch/powerpc/platforms/embedded6xx/storcenter.c | 121 + arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c | 306 ++ arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h | 27 + arch/powerpc/platforms/embedded6xx/wii.c | 180 ++ arch/powerpc/platforms/fsl_uli1575.c | 379 +++ arch/powerpc/platforms/maple/Kconfig | 19 + arch/powerpc/platforms/maple/Makefile | 2 + arch/powerpc/platforms/maple/maple.h | 15 + arch/powerpc/platforms/maple/pci.c | 672 +++++ arch/powerpc/platforms/maple/setup.c | 363 +++ arch/powerpc/platforms/maple/time.c | 170 ++ arch/powerpc/platforms/microwatt/Kconfig | 11 + arch/powerpc/platforms/microwatt/Makefile | 1 + arch/powerpc/platforms/microwatt/microwatt.h | 7 + arch/powerpc/platforms/microwatt/rng.c | 44 + arch/powerpc/platforms/microwatt/setup.c | 43 + arch/powerpc/platforms/pasemi/Kconfig | 51 + arch/powerpc/platforms/pasemi/Makefile | 4 + arch/powerpc/platforms/pasemi/dma_lib.c | 621 ++++ arch/powerpc/platforms/pasemi/gpio_mdio.c | 327 +++ arch/powerpc/platforms/pasemi/idle.c | 93 + arch/powerpc/platforms/pasemi/iommu.c | 267 ++ arch/powerpc/platforms/pasemi/misc.c | 87 + arch/powerpc/platforms/pasemi/msi.c | 162 ++ arch/powerpc/platforms/pasemi/pasemi.h | 36 + arch/powerpc/platforms/pasemi/pci.c | 294 ++ arch/powerpc/platforms/pasemi/powersave.S | 76 + arch/powerpc/platforms/pasemi/setup.c | 456 +++ arch/powerpc/platforms/pasemi/time.c | 18 + arch/powerpc/platforms/powermac/Kconfig | 34 + arch/powerpc/platforms/powermac/Makefile | 25 + arch/powerpc/platforms/powermac/backlight.c | 220 ++ arch/powerpc/platforms/powermac/bootx_init.c | 595 ++++ arch/powerpc/platforms/powermac/cache.S | 356 +++ arch/powerpc/platforms/powermac/feature.c | 3022 ++++++++++++++++++++ arch/powerpc/platforms/powermac/low_i2c.c | 1514 ++++++++++ arch/powerpc/platforms/powermac/nvram.c | 656 +++++ arch/powerpc/platforms/powermac/pci.c | 1261 ++++++++ arch/powerpc/platforms/powermac/pfunc_base.c | 412 +++ arch/powerpc/platforms/powermac/pfunc_core.c | 1022 +++++++ arch/powerpc/platforms/powermac/pic.c | 650 +++++ arch/powerpc/platforms/powermac/pmac.h | 47 + arch/powerpc/platforms/powermac/setup.c | 601 ++++ arch/powerpc/platforms/powermac/sleep.S | 433 +++ arch/powerpc/platforms/powermac/smp.c | 1025 +++++++ arch/powerpc/platforms/powermac/time.c | 243 ++ arch/powerpc/platforms/powermac/udbg_adb.c | 220 ++ arch/powerpc/platforms/powermac/udbg_scc.c | 184 ++ arch/powerpc/platforms/powernv/Kconfig | 38 + arch/powerpc/platforms/powernv/Makefile | 33 + arch/powerpc/platforms/powernv/copy-paste.h | 42 + arch/powerpc/platforms/powernv/eeh-powernv.c | 1696 +++++++++++ arch/powerpc/platforms/powernv/idle.c | 1507 ++++++++++ arch/powerpc/platforms/powernv/memtrace.c | 339 +++ arch/powerpc/platforms/powernv/ocxl.c | 598 ++++ arch/powerpc/platforms/powernv/opal-async.c | 290 ++ arch/powerpc/platforms/powernv/opal-call.c | 295 ++ arch/powerpc/platforms/powernv/opal-core.c | 663 +++++ arch/powerpc/platforms/powernv/opal-dump.c | 459 +++ arch/powerpc/platforms/powernv/opal-elog.c | 340 +++ arch/powerpc/platforms/powernv/opal-fadump.c | 726 +++++ arch/powerpc/platforms/powernv/opal-fadump.h | 146 + arch/powerpc/platforms/powernv/opal-flash.c | 566 ++++ arch/powerpc/platforms/powernv/opal-hmi.c | 381 +++ arch/powerpc/platforms/powernv/opal-imc.c | 324 +++ arch/powerpc/platforms/powernv/opal-irqchip.c | 312 ++ arch/powerpc/platforms/powernv/opal-kmsg.c | 47 + arch/powerpc/platforms/powernv/opal-lpc.c | 418 +++ .../powerpc/platforms/powernv/opal-memory-errors.c | 134 + arch/powerpc/platforms/powernv/opal-msglog.c | 161 ++ arch/powerpc/platforms/powernv/opal-nvram.c | 113 + arch/powerpc/platforms/powernv/opal-power.c | 174 ++ arch/powerpc/platforms/powernv/opal-powercap.c | 251 ++ arch/powerpc/platforms/powernv/opal-prd.c | 452 +++ arch/powerpc/platforms/powernv/opal-psr.c | 175 ++ arch/powerpc/platforms/powernv/opal-rtc.c | 84 + arch/powerpc/platforms/powernv/opal-secvar.c | 182 ++ .../powerpc/platforms/powernv/opal-sensor-groups.c | 240 ++ arch/powerpc/platforms/powernv/opal-sensor.c | 132 + arch/powerpc/platforms/powernv/opal-sysparam.c | 294 ++ arch/powerpc/platforms/powernv/opal-tracepoints.c | 87 + arch/powerpc/platforms/powernv/opal-wrappers.S | 63 + arch/powerpc/platforms/powernv/opal-xscom.c | 210 ++ arch/powerpc/platforms/powernv/opal.c | 1251 ++++++++ arch/powerpc/platforms/powernv/pci-cxl.c | 153 + arch/powerpc/platforms/powernv/pci-ioda-tce.c | 430 +++ arch/powerpc/platforms/powernv/pci-ioda.c | 2827 ++++++++++++++++++ arch/powerpc/platforms/powernv/pci-sriov.c | 760 +++++ arch/powerpc/platforms/powernv/pci.c | 862 ++++++ arch/powerpc/platforms/powernv/pci.h | 340 +++ arch/powerpc/platforms/powernv/powernv.h | 47 + arch/powerpc/platforms/powernv/rng.c | 200 ++ arch/powerpc/platforms/powernv/setup.c | 587 ++++ arch/powerpc/platforms/powernv/smp.c | 441 +++ arch/powerpc/platforms/powernv/subcore-asm.S | 91 + arch/powerpc/platforms/powernv/subcore.c | 449 +++ arch/powerpc/platforms/powernv/subcore.h | 21 + arch/powerpc/platforms/powernv/ultravisor.c | 70 + arch/powerpc/platforms/powernv/vas-debug.c | 168 ++ arch/powerpc/platforms/powernv/vas-fault.c | 245 ++ arch/powerpc/platforms/powernv/vas-trace.h | 113 + arch/powerpc/platforms/powernv/vas-window.c | 1471 ++++++++++ arch/powerpc/platforms/powernv/vas.c | 253 ++ arch/powerpc/platforms/powernv/vas.h | 501 ++++ arch/powerpc/platforms/ps3/Kconfig | 182 ++ arch/powerpc/platforms/ps3/Makefile | 9 + arch/powerpc/platforms/ps3/device-init.c | 975 +++++++ arch/powerpc/platforms/ps3/exports.c | 13 + arch/powerpc/platforms/ps3/gelic_udbg.c | 244 ++ arch/powerpc/platforms/ps3/htab.c | 195 ++ arch/powerpc/platforms/ps3/hvcall.S | 792 +++++ arch/powerpc/platforms/ps3/interrupt.c | 783 +++++ arch/powerpc/platforms/ps3/mm.c | 1254 ++++++++ arch/powerpc/platforms/ps3/os-area.c | 830 ++++++ arch/powerpc/platforms/ps3/platform.h | 253 ++ arch/powerpc/platforms/ps3/repository.c | 1380 +++++++++ arch/powerpc/platforms/ps3/setup.c | 305 ++ arch/powerpc/platforms/ps3/smp.c | 120 + arch/powerpc/platforms/ps3/spu.c | 619 ++++ arch/powerpc/platforms/ps3/system-bus.c | 803 ++++++ arch/powerpc/platforms/ps3/time.c | 59 + arch/powerpc/platforms/pseries/Kconfig | 186 ++ arch/powerpc/platforms/pseries/Makefile | 39 + arch/powerpc/platforms/pseries/cc_platform.c | 26 + arch/powerpc/platforms/pseries/cmm.c | 663 +++++ arch/powerpc/platforms/pseries/dlpar.c | 583 ++++ arch/powerpc/platforms/pseries/dtl.c | 445 +++ arch/powerpc/platforms/pseries/eeh_pseries.c | 887 ++++++ arch/powerpc/platforms/pseries/event_sources.c | 30 + arch/powerpc/platforms/pseries/firmware.c | 191 ++ arch/powerpc/platforms/pseries/hotplug-cpu.c | 901 ++++++ arch/powerpc/platforms/pseries/hotplug-memory.c | 923 ++++++ arch/powerpc/platforms/pseries/hvCall.S | 370 +++ arch/powerpc/platforms/pseries/hvCall_inst.c | 140 + arch/powerpc/platforms/pseries/hvconsole.c | 75 + arch/powerpc/platforms/pseries/hvcserver.c | 239 ++ arch/powerpc/platforms/pseries/ibmebus.c | 479 ++++ arch/powerpc/platforms/pseries/io_event_irq.c | 161 ++ arch/powerpc/platforms/pseries/iommu.c | 1742 +++++++++++ arch/powerpc/platforms/pseries/kexec.c | 71 + arch/powerpc/platforms/pseries/lpar.c | 2026 +++++++++++++ arch/powerpc/platforms/pseries/lparcfg.c | 802 ++++++ arch/powerpc/platforms/pseries/mobility.c | 830 ++++++ arch/powerpc/platforms/pseries/msi.c | 698 +++++ arch/powerpc/platforms/pseries/nvram.c | 241 ++ arch/powerpc/platforms/pseries/of_helpers.c | 97 + arch/powerpc/platforms/pseries/of_helpers.h | 9 + arch/powerpc/platforms/pseries/papr-sysparm.c | 151 + .../platforms/pseries/papr_platform_attributes.c | 362 +++ arch/powerpc/platforms/pseries/papr_scm.c | 1581 ++++++++++ arch/powerpc/platforms/pseries/pci.c | 322 +++ arch/powerpc/platforms/pseries/pci_dlpar.c | 111 + arch/powerpc/platforms/pseries/plpks-secvar.c | 217 ++ arch/powerpc/platforms/pseries/plpks.c | 711 +++++ arch/powerpc/platforms/pseries/pmem.c | 167 ++ arch/powerpc/platforms/pseries/power.c | 72 + arch/powerpc/platforms/pseries/pseries.h | 131 + arch/powerpc/platforms/pseries/pseries_energy.c | 368 +++ arch/powerpc/platforms/pseries/ras.c | 882 ++++++ arch/powerpc/platforms/pseries/reconfig.c | 414 +++ arch/powerpc/platforms/pseries/rng.c | 37 + arch/powerpc/platforms/pseries/rtas-fadump.c | 557 ++++ arch/powerpc/platforms/pseries/rtas-fadump.h | 114 + arch/powerpc/platforms/pseries/rtas-work-area.c | 209 ++ arch/powerpc/platforms/pseries/setup.c | 1162 ++++++++ arch/powerpc/platforms/pseries/smp.c | 282 ++ arch/powerpc/platforms/pseries/suspend.c | 189 ++ arch/powerpc/platforms/pseries/svm.c | 94 + arch/powerpc/platforms/pseries/vas-sysfs.c | 281 ++ arch/powerpc/platforms/pseries/vas.c | 1121 ++++++++ arch/powerpc/platforms/pseries/vas.h | 157 + arch/powerpc/platforms/pseries/vio.c | 1729 +++++++++++ arch/powerpc/platforms/pseries/vphn.c | 90 + 392 files changed, 121876 insertions(+) create mode 100644 arch/powerpc/platforms/40x/Kconfig create mode 100644 arch/powerpc/platforms/40x/Makefile create mode 100644 arch/powerpc/platforms/40x/ppc40x_simple.c create mode 100644 arch/powerpc/platforms/44x/44x.h create mode 100644 arch/powerpc/platforms/44x/Kconfig create mode 100644 arch/powerpc/platforms/44x/Makefile create mode 100644 arch/powerpc/platforms/44x/canyonlands.c create mode 100644 arch/powerpc/platforms/44x/ebony.c create mode 100644 arch/powerpc/platforms/44x/fsp2.c create mode 100644 arch/powerpc/platforms/44x/fsp2.h create mode 100644 arch/powerpc/platforms/44x/idle.c create mode 100644 arch/powerpc/platforms/44x/iss4xx.c create mode 100644 arch/powerpc/platforms/44x/machine_check.c create mode 100644 arch/powerpc/platforms/44x/misc_44x.S create mode 100644 arch/powerpc/platforms/44x/ppc44x_simple.c create mode 100644 arch/powerpc/platforms/44x/ppc476.c create mode 100644 arch/powerpc/platforms/44x/ppc476_modules.lds create mode 100644 arch/powerpc/platforms/44x/sam440ep.c create mode 100644 arch/powerpc/platforms/44x/warp.c create mode 100644 arch/powerpc/platforms/4xx/Makefile create mode 100644 arch/powerpc/platforms/4xx/cpm.c create mode 100644 arch/powerpc/platforms/4xx/gpio.c create mode 100644 arch/powerpc/platforms/4xx/hsta_msi.c create mode 100644 arch/powerpc/platforms/4xx/machine_check.c create mode 100644 arch/powerpc/platforms/4xx/pci.c create mode 100644 arch/powerpc/platforms/4xx/pci.h create mode 100644 arch/powerpc/platforms/4xx/soc.c create mode 100644 arch/powerpc/platforms/4xx/uic.c create mode 100644 arch/powerpc/platforms/512x/Kconfig create mode 100644 arch/powerpc/platforms/512x/Makefile create mode 100644 arch/powerpc/platforms/512x/clock-commonclk.c create mode 100644 arch/powerpc/platforms/512x/mpc5121_ads.c create mode 100644 arch/powerpc/platforms/512x/mpc5121_ads.h create mode 100644 arch/powerpc/platforms/512x/mpc5121_ads_cpld.c create mode 100644 arch/powerpc/platforms/512x/mpc512x.h create mode 100644 arch/powerpc/platforms/512x/mpc512x_generic.c create mode 100644 arch/powerpc/platforms/512x/mpc512x_lpbfifo.c create mode 100644 arch/powerpc/platforms/512x/mpc512x_shared.c create mode 100644 arch/powerpc/platforms/512x/pdm360ng.c create mode 100644 arch/powerpc/platforms/52xx/Kconfig create mode 100644 arch/powerpc/platforms/52xx/Makefile create mode 100644 arch/powerpc/platforms/52xx/efika.c create mode 100644 arch/powerpc/platforms/52xx/lite5200.c create mode 100644 arch/powerpc/platforms/52xx/lite5200_pm.c create mode 100644 arch/powerpc/platforms/52xx/lite5200_sleep.S create mode 100644 arch/powerpc/platforms/52xx/media5200.c create mode 100644 arch/powerpc/platforms/52xx/mpc5200_simple.c create mode 100644 arch/powerpc/platforms/52xx/mpc52xx_common.c create mode 100644 arch/powerpc/platforms/52xx/mpc52xx_gpt.c create mode 100644 arch/powerpc/platforms/52xx/mpc52xx_pci.c create mode 100644 arch/powerpc/platforms/52xx/mpc52xx_pic.c create mode 100644 arch/powerpc/platforms/52xx/mpc52xx_pm.c create mode 100644 arch/powerpc/platforms/52xx/mpc52xx_sleep.S create mode 100644 arch/powerpc/platforms/82xx/Kconfig create mode 100644 arch/powerpc/platforms/82xx/Makefile create mode 100644 arch/powerpc/platforms/82xx/ep8248e.c create mode 100644 arch/powerpc/platforms/82xx/km82xx.c create mode 100644 arch/powerpc/platforms/82xx/pq2.c create mode 100644 arch/powerpc/platforms/82xx/pq2.h create mode 100644 arch/powerpc/platforms/83xx/Kconfig create mode 100644 arch/powerpc/platforms/83xx/Makefile create mode 100644 arch/powerpc/platforms/83xx/asp834x.c create mode 100644 arch/powerpc/platforms/83xx/km83xx.c create mode 100644 arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c create mode 100644 arch/powerpc/platforms/83xx/misc.c create mode 100644 arch/powerpc/platforms/83xx/mpc830x_rdb.c create mode 100644 arch/powerpc/platforms/83xx/mpc831x_rdb.c create mode 100644 arch/powerpc/platforms/83xx/mpc832x_rdb.c create mode 100644 arch/powerpc/platforms/83xx/mpc834x_itx.c create mode 100644 arch/powerpc/platforms/83xx/mpc836x_rdk.c create mode 100644 arch/powerpc/platforms/83xx/mpc837x_rdb.c create mode 100644 arch/powerpc/platforms/83xx/mpc83xx.h create mode 100644 arch/powerpc/platforms/83xx/suspend-asm.S create mode 100644 arch/powerpc/platforms/83xx/suspend.c create mode 100644 arch/powerpc/platforms/83xx/usb_831x.c create mode 100644 arch/powerpc/platforms/83xx/usb_834x.c create mode 100644 arch/powerpc/platforms/83xx/usb_837x.c create mode 100644 arch/powerpc/platforms/85xx/Kconfig create mode 100644 arch/powerpc/platforms/85xx/Makefile create mode 100644 arch/powerpc/platforms/85xx/bsc913x_qds.c create mode 100644 arch/powerpc/platforms/85xx/bsc913x_rdb.c create mode 100644 arch/powerpc/platforms/85xx/c293pcie.c create mode 100644 arch/powerpc/platforms/85xx/common.c create mode 100644 arch/powerpc/platforms/85xx/corenet_generic.c create mode 100644 arch/powerpc/platforms/85xx/ge_imp3a.c create mode 100644 arch/powerpc/platforms/85xx/ksi8560.c create mode 100644 arch/powerpc/platforms/85xx/mpc8536_ds.c create mode 100644 arch/powerpc/platforms/85xx/mpc85xx.h create mode 100644 arch/powerpc/platforms/85xx/mpc85xx_8259.c create mode 100644 arch/powerpc/platforms/85xx/mpc85xx_ds.c create mode 100644 arch/powerpc/platforms/85xx/mpc85xx_mds.c create mode 100644 arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c create mode 100644 arch/powerpc/platforms/85xx/mpc85xx_rdb.c create mode 100644 arch/powerpc/platforms/85xx/mvme2500.c create mode 100644 arch/powerpc/platforms/85xx/p1010rdb.c create mode 100644 arch/powerpc/platforms/85xx/p1022_ds.c create mode 100644 arch/powerpc/platforms/85xx/p1022_rdk.c create mode 100644 arch/powerpc/platforms/85xx/p1023_rdb.c create mode 100644 arch/powerpc/platforms/85xx/p2020.c create mode 100644 arch/powerpc/platforms/85xx/ppa8548.c create mode 100644 arch/powerpc/platforms/85xx/qemu_e500.c create mode 100644 arch/powerpc/platforms/85xx/sgy_cts1000.c create mode 100644 arch/powerpc/platforms/85xx/smp.c create mode 100644 arch/powerpc/platforms/85xx/smp.h create mode 100644 arch/powerpc/platforms/85xx/socrates.c create mode 100644 arch/powerpc/platforms/85xx/socrates_fpga_pic.c create mode 100644 arch/powerpc/platforms/85xx/socrates_fpga_pic.h create mode 100644 arch/powerpc/platforms/85xx/stx_gp3.c create mode 100644 arch/powerpc/platforms/85xx/t1042rdb_diu.c create mode 100644 arch/powerpc/platforms/85xx/tqm85xx.c create mode 100644 arch/powerpc/platforms/85xx/twr_p102x.c create mode 100644 arch/powerpc/platforms/85xx/xes_mpc85xx.c create mode 100644 arch/powerpc/platforms/86xx/Kconfig create mode 100644 arch/powerpc/platforms/86xx/Makefile create mode 100644 arch/powerpc/platforms/86xx/common.c create mode 100644 arch/powerpc/platforms/86xx/gef_ppc9a.c create mode 100644 arch/powerpc/platforms/86xx/gef_sbc310.c create mode 100644 arch/powerpc/platforms/86xx/gef_sbc610.c create mode 100644 arch/powerpc/platforms/86xx/mpc86xx.h create mode 100644 arch/powerpc/platforms/86xx/mpc86xx_smp.c create mode 100644 arch/powerpc/platforms/86xx/mvme7100.c create mode 100644 arch/powerpc/platforms/86xx/pic.c create mode 100644 arch/powerpc/platforms/8xx/Kconfig create mode 100644 arch/powerpc/platforms/8xx/Makefile create mode 100644 arch/powerpc/platforms/8xx/adder875.c create mode 100644 arch/powerpc/platforms/8xx/cpm1-ic.c create mode 100644 arch/powerpc/platforms/8xx/cpm1.c create mode 100644 arch/powerpc/platforms/8xx/ep88xc.c create mode 100644 arch/powerpc/platforms/8xx/m8xx_setup.c create mode 100644 arch/powerpc/platforms/8xx/machine_check.c create mode 100644 arch/powerpc/platforms/8xx/mpc86xads.h create mode 100644 arch/powerpc/platforms/8xx/mpc86xads_setup.c create mode 100644 arch/powerpc/platforms/8xx/mpc885ads.h create mode 100644 arch/powerpc/platforms/8xx/mpc885ads_setup.c create mode 100644 arch/powerpc/platforms/8xx/mpc8xx.h create mode 100644 arch/powerpc/platforms/8xx/pic.c create mode 100644 arch/powerpc/platforms/8xx/pic.h create mode 100644 arch/powerpc/platforms/8xx/tqm8xx_setup.c create mode 100644 arch/powerpc/platforms/Kconfig create mode 100644 arch/powerpc/platforms/Kconfig.cputype create mode 100644 arch/powerpc/platforms/Makefile create mode 100644 arch/powerpc/platforms/amigaone/Kconfig create mode 100644 arch/powerpc/platforms/amigaone/Makefile create mode 100644 arch/powerpc/platforms/amigaone/setup.c create mode 100644 arch/powerpc/platforms/book3s/Kconfig create mode 100644 arch/powerpc/platforms/book3s/Makefile create mode 100644 arch/powerpc/platforms/book3s/vas-api.c create mode 100644 arch/powerpc/platforms/cell/Kconfig create mode 100644 arch/powerpc/platforms/cell/Makefile create mode 100644 arch/powerpc/platforms/cell/axon_msi.c create mode 100644 arch/powerpc/platforms/cell/cbe_powerbutton.c create mode 100644 arch/powerpc/platforms/cell/cbe_regs.c create mode 100644 arch/powerpc/platforms/cell/cbe_thermal.c create mode 100644 arch/powerpc/platforms/cell/cell.h create mode 100644 arch/powerpc/platforms/cell/cpufreq_spudemand.c create mode 100644 arch/powerpc/platforms/cell/interrupt.c create mode 100644 arch/powerpc/platforms/cell/interrupt.h create mode 100644 arch/powerpc/platforms/cell/iommu.c create mode 100644 arch/powerpc/platforms/cell/pervasive.c create mode 100644 arch/powerpc/platforms/cell/pervasive.h create mode 100644 arch/powerpc/platforms/cell/pmu.c create mode 100644 arch/powerpc/platforms/cell/ras.c create mode 100644 arch/powerpc/platforms/cell/ras.h create mode 100644 arch/powerpc/platforms/cell/setup.c create mode 100644 arch/powerpc/platforms/cell/smp.c create mode 100644 arch/powerpc/platforms/cell/spider-pci.c create mode 100644 arch/powerpc/platforms/cell/spider-pic.c create mode 100644 arch/powerpc/platforms/cell/spu_base.c create mode 100644 arch/powerpc/platforms/cell/spu_callbacks.c create mode 100644 arch/powerpc/platforms/cell/spu_manage.c create mode 100644 arch/powerpc/platforms/cell/spu_priv1_mmio.c create mode 100644 arch/powerpc/platforms/cell/spu_priv1_mmio.h create mode 100644 arch/powerpc/platforms/cell/spu_syscalls.c create mode 100644 arch/powerpc/platforms/cell/spufs/.gitignore create mode 100644 arch/powerpc/platforms/cell/spufs/Makefile create mode 100644 arch/powerpc/platforms/cell/spufs/backing_ops.c create mode 100644 arch/powerpc/platforms/cell/spufs/context.c create mode 100644 arch/powerpc/platforms/cell/spufs/coredump.c create mode 100644 arch/powerpc/platforms/cell/spufs/fault.c create mode 100644 arch/powerpc/platforms/cell/spufs/file.c create mode 100644 arch/powerpc/platforms/cell/spufs/gang.c create mode 100644 arch/powerpc/platforms/cell/spufs/hw_ops.c create mode 100644 arch/powerpc/platforms/cell/spufs/inode.c create mode 100644 arch/powerpc/platforms/cell/spufs/lscsa_alloc.c create mode 100644 arch/powerpc/platforms/cell/spufs/run.c create mode 100644 arch/powerpc/platforms/cell/spufs/sched.c create mode 100644 arch/powerpc/platforms/cell/spufs/spu_restore.c create mode 100644 arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S create mode 100644 arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped create mode 100644 arch/powerpc/platforms/cell/spufs/spu_save.c create mode 100644 arch/powerpc/platforms/cell/spufs/spu_save_crt0.S create mode 100644 arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped create mode 100644 arch/powerpc/platforms/cell/spufs/spu_utils.h create mode 100644 arch/powerpc/platforms/cell/spufs/spufs.h create mode 100644 arch/powerpc/platforms/cell/spufs/sputrace.h create mode 100644 arch/powerpc/platforms/cell/spufs/switch.c create mode 100644 arch/powerpc/platforms/cell/spufs/syscalls.c create mode 100644 arch/powerpc/platforms/chrp/Kconfig create mode 100644 arch/powerpc/platforms/chrp/Makefile create mode 100644 arch/powerpc/platforms/chrp/chrp.h create mode 100644 arch/powerpc/platforms/chrp/gg2.h create mode 100644 arch/powerpc/platforms/chrp/nvram.c create mode 100644 arch/powerpc/platforms/chrp/pci.c create mode 100644 arch/powerpc/platforms/chrp/pegasos_eth.c create mode 100644 arch/powerpc/platforms/chrp/setup.c create mode 100644 arch/powerpc/platforms/chrp/smp.c create mode 100644 arch/powerpc/platforms/chrp/time.c create mode 100644 arch/powerpc/platforms/embedded6xx/Kconfig create mode 100644 arch/powerpc/platforms/embedded6xx/Makefile create mode 100644 arch/powerpc/platforms/embedded6xx/flipper-pic.c create mode 100644 arch/powerpc/platforms/embedded6xx/flipper-pic.h create mode 100644 arch/powerpc/platforms/embedded6xx/gamecube.c create mode 100644 arch/powerpc/platforms/embedded6xx/hlwd-pic.c create mode 100644 arch/powerpc/platforms/embedded6xx/hlwd-pic.h create mode 100644 arch/powerpc/platforms/embedded6xx/holly.c create mode 100644 arch/powerpc/platforms/embedded6xx/linkstation.c create mode 100644 arch/powerpc/platforms/embedded6xx/ls_uart.c create mode 100644 arch/powerpc/platforms/embedded6xx/mpc10x.h create mode 100644 arch/powerpc/platforms/embedded6xx/mvme5100.c create mode 100644 arch/powerpc/platforms/embedded6xx/storcenter.c create mode 100644 arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c create mode 100644 arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h create mode 100644 arch/powerpc/platforms/embedded6xx/wii.c create mode 100644 arch/powerpc/platforms/fsl_uli1575.c create mode 100644 arch/powerpc/platforms/maple/Kconfig create mode 100644 arch/powerpc/platforms/maple/Makefile create mode 100644 arch/powerpc/platforms/maple/maple.h create mode 100644 arch/powerpc/platforms/maple/pci.c create mode 100644 arch/powerpc/platforms/maple/setup.c create mode 100644 arch/powerpc/platforms/maple/time.c create mode 100644 arch/powerpc/platforms/microwatt/Kconfig create mode 100644 arch/powerpc/platforms/microwatt/Makefile create mode 100644 arch/powerpc/platforms/microwatt/microwatt.h create mode 100644 arch/powerpc/platforms/microwatt/rng.c create mode 100644 arch/powerpc/platforms/microwatt/setup.c create mode 100644 arch/powerpc/platforms/pasemi/Kconfig create mode 100644 arch/powerpc/platforms/pasemi/Makefile create mode 100644 arch/powerpc/platforms/pasemi/dma_lib.c create mode 100644 arch/powerpc/platforms/pasemi/gpio_mdio.c create mode 100644 arch/powerpc/platforms/pasemi/idle.c create mode 100644 arch/powerpc/platforms/pasemi/iommu.c create mode 100644 arch/powerpc/platforms/pasemi/misc.c create mode 100644 arch/powerpc/platforms/pasemi/msi.c create mode 100644 arch/powerpc/platforms/pasemi/pasemi.h create mode 100644 arch/powerpc/platforms/pasemi/pci.c create mode 100644 arch/powerpc/platforms/pasemi/powersave.S create mode 100644 arch/powerpc/platforms/pasemi/setup.c create mode 100644 arch/powerpc/platforms/pasemi/time.c create mode 100644 arch/powerpc/platforms/powermac/Kconfig create mode 100644 arch/powerpc/platforms/powermac/Makefile create mode 100644 arch/powerpc/platforms/powermac/backlight.c create mode 100644 arch/powerpc/platforms/powermac/bootx_init.c create mode 100644 arch/powerpc/platforms/powermac/cache.S create mode 100644 arch/powerpc/platforms/powermac/feature.c create mode 100644 arch/powerpc/platforms/powermac/low_i2c.c create mode 100644 arch/powerpc/platforms/powermac/nvram.c create mode 100644 arch/powerpc/platforms/powermac/pci.c create mode 100644 arch/powerpc/platforms/powermac/pfunc_base.c create mode 100644 arch/powerpc/platforms/powermac/pfunc_core.c create mode 100644 arch/powerpc/platforms/powermac/pic.c create mode 100644 arch/powerpc/platforms/powermac/pmac.h create mode 100644 arch/powerpc/platforms/powermac/setup.c create mode 100644 arch/powerpc/platforms/powermac/sleep.S create mode 100644 arch/powerpc/platforms/powermac/smp.c create mode 100644 arch/powerpc/platforms/powermac/time.c create mode 100644 arch/powerpc/platforms/powermac/udbg_adb.c create mode 100644 arch/powerpc/platforms/powermac/udbg_scc.c create mode 100644 arch/powerpc/platforms/powernv/Kconfig create mode 100644 arch/powerpc/platforms/powernv/Makefile create mode 100644 arch/powerpc/platforms/powernv/copy-paste.h create mode 100644 arch/powerpc/platforms/powernv/eeh-powernv.c create mode 100644 arch/powerpc/platforms/powernv/idle.c create mode 100644 arch/powerpc/platforms/powernv/memtrace.c create mode 100644 arch/powerpc/platforms/powernv/ocxl.c create mode 100644 arch/powerpc/platforms/powernv/opal-async.c create mode 100644 arch/powerpc/platforms/powernv/opal-call.c create mode 100644 arch/powerpc/platforms/powernv/opal-core.c create mode 100644 arch/powerpc/platforms/powernv/opal-dump.c create mode 100644 arch/powerpc/platforms/powernv/opal-elog.c create mode 100644 arch/powerpc/platforms/powernv/opal-fadump.c create mode 100644 arch/powerpc/platforms/powernv/opal-fadump.h create mode 100644 arch/powerpc/platforms/powernv/opal-flash.c create mode 100644 arch/powerpc/platforms/powernv/opal-hmi.c create mode 100644 arch/powerpc/platforms/powernv/opal-imc.c create mode 100644 arch/powerpc/platforms/powernv/opal-irqchip.c create mode 100644 arch/powerpc/platforms/powernv/opal-kmsg.c create mode 100644 arch/powerpc/platforms/powernv/opal-lpc.c create mode 100644 arch/powerpc/platforms/powernv/opal-memory-errors.c create mode 100644 arch/powerpc/platforms/powernv/opal-msglog.c create mode 100644 arch/powerpc/platforms/powernv/opal-nvram.c create mode 100644 arch/powerpc/platforms/powernv/opal-power.c create mode 100644 arch/powerpc/platforms/powernv/opal-powercap.c create mode 100644 arch/powerpc/platforms/powernv/opal-prd.c create mode 100644 arch/powerpc/platforms/powernv/opal-psr.c create mode 100644 arch/powerpc/platforms/powernv/opal-rtc.c create mode 100644 arch/powerpc/platforms/powernv/opal-secvar.c create mode 100644 arch/powerpc/platforms/powernv/opal-sensor-groups.c create mode 100644 arch/powerpc/platforms/powernv/opal-sensor.c create mode 100644 arch/powerpc/platforms/powernv/opal-sysparam.c create mode 100644 arch/powerpc/platforms/powernv/opal-tracepoints.c create mode 100644 arch/powerpc/platforms/powernv/opal-wrappers.S create mode 100644 arch/powerpc/platforms/powernv/opal-xscom.c create mode 100644 arch/powerpc/platforms/powernv/opal.c create mode 100644 arch/powerpc/platforms/powernv/pci-cxl.c create mode 100644 arch/powerpc/platforms/powernv/pci-ioda-tce.c create mode 100644 arch/powerpc/platforms/powernv/pci-ioda.c create mode 100644 arch/powerpc/platforms/powernv/pci-sriov.c create mode 100644 arch/powerpc/platforms/powernv/pci.c create mode 100644 arch/powerpc/platforms/powernv/pci.h create mode 100644 arch/powerpc/platforms/powernv/powernv.h create mode 100644 arch/powerpc/platforms/powernv/rng.c create mode 100644 arch/powerpc/platforms/powernv/setup.c create mode 100644 arch/powerpc/platforms/powernv/smp.c create mode 100644 arch/powerpc/platforms/powernv/subcore-asm.S create mode 100644 arch/powerpc/platforms/powernv/subcore.c create mode 100644 arch/powerpc/platforms/powernv/subcore.h create mode 100644 arch/powerpc/platforms/powernv/ultravisor.c create mode 100644 arch/powerpc/platforms/powernv/vas-debug.c create mode 100644 arch/powerpc/platforms/powernv/vas-fault.c create mode 100644 arch/powerpc/platforms/powernv/vas-trace.h create mode 100644 arch/powerpc/platforms/powernv/vas-window.c create mode 100644 arch/powerpc/platforms/powernv/vas.c create mode 100644 arch/powerpc/platforms/powernv/vas.h create mode 100644 arch/powerpc/platforms/ps3/Kconfig create mode 100644 arch/powerpc/platforms/ps3/Makefile create mode 100644 arch/powerpc/platforms/ps3/device-init.c create mode 100644 arch/powerpc/platforms/ps3/exports.c create mode 100644 arch/powerpc/platforms/ps3/gelic_udbg.c create mode 100644 arch/powerpc/platforms/ps3/htab.c create mode 100644 arch/powerpc/platforms/ps3/hvcall.S create mode 100644 arch/powerpc/platforms/ps3/interrupt.c create mode 100644 arch/powerpc/platforms/ps3/mm.c create mode 100644 arch/powerpc/platforms/ps3/os-area.c create mode 100644 arch/powerpc/platforms/ps3/platform.h create mode 100644 arch/powerpc/platforms/ps3/repository.c create mode 100644 arch/powerpc/platforms/ps3/setup.c create mode 100644 arch/powerpc/platforms/ps3/smp.c create mode 100644 arch/powerpc/platforms/ps3/spu.c create mode 100644 arch/powerpc/platforms/ps3/system-bus.c create mode 100644 arch/powerpc/platforms/ps3/time.c create mode 100644 arch/powerpc/platforms/pseries/Kconfig create mode 100644 arch/powerpc/platforms/pseries/Makefile create mode 100644 arch/powerpc/platforms/pseries/cc_platform.c create mode 100644 arch/powerpc/platforms/pseries/cmm.c create mode 100644 arch/powerpc/platforms/pseries/dlpar.c create mode 100644 arch/powerpc/platforms/pseries/dtl.c create mode 100644 arch/powerpc/platforms/pseries/eeh_pseries.c create mode 100644 arch/powerpc/platforms/pseries/event_sources.c create mode 100644 arch/powerpc/platforms/pseries/firmware.c create mode 100644 arch/powerpc/platforms/pseries/hotplug-cpu.c create mode 100644 arch/powerpc/platforms/pseries/hotplug-memory.c create mode 100644 arch/powerpc/platforms/pseries/hvCall.S create mode 100644 arch/powerpc/platforms/pseries/hvCall_inst.c create mode 100644 arch/powerpc/platforms/pseries/hvconsole.c create mode 100644 arch/powerpc/platforms/pseries/hvcserver.c create mode 100644 arch/powerpc/platforms/pseries/ibmebus.c create mode 100644 arch/powerpc/platforms/pseries/io_event_irq.c create mode 100644 arch/powerpc/platforms/pseries/iommu.c create mode 100644 arch/powerpc/platforms/pseries/kexec.c create mode 100644 arch/powerpc/platforms/pseries/lpar.c create mode 100644 arch/powerpc/platforms/pseries/lparcfg.c create mode 100644 arch/powerpc/platforms/pseries/mobility.c create mode 100644 arch/powerpc/platforms/pseries/msi.c create mode 100644 arch/powerpc/platforms/pseries/nvram.c create mode 100644 arch/powerpc/platforms/pseries/of_helpers.c create mode 100644 arch/powerpc/platforms/pseries/of_helpers.h create mode 100644 arch/powerpc/platforms/pseries/papr-sysparm.c create mode 100644 arch/powerpc/platforms/pseries/papr_platform_attributes.c create mode 100644 arch/powerpc/platforms/pseries/papr_scm.c create mode 100644 arch/powerpc/platforms/pseries/pci.c create mode 100644 arch/powerpc/platforms/pseries/pci_dlpar.c create mode 100644 arch/powerpc/platforms/pseries/plpks-secvar.c create mode 100644 arch/powerpc/platforms/pseries/plpks.c create mode 100644 arch/powerpc/platforms/pseries/pmem.c create mode 100644 arch/powerpc/platforms/pseries/power.c create mode 100644 arch/powerpc/platforms/pseries/pseries.h create mode 100644 arch/powerpc/platforms/pseries/pseries_energy.c create mode 100644 arch/powerpc/platforms/pseries/ras.c create mode 100644 arch/powerpc/platforms/pseries/reconfig.c create mode 100644 arch/powerpc/platforms/pseries/rng.c create mode 100644 arch/powerpc/platforms/pseries/rtas-fadump.c create mode 100644 arch/powerpc/platforms/pseries/rtas-fadump.h create mode 100644 arch/powerpc/platforms/pseries/rtas-work-area.c create mode 100644 arch/powerpc/platforms/pseries/setup.c create mode 100644 arch/powerpc/platforms/pseries/smp.c create mode 100644 arch/powerpc/platforms/pseries/suspend.c create mode 100644 arch/powerpc/platforms/pseries/svm.c create mode 100644 arch/powerpc/platforms/pseries/vas-sysfs.c create mode 100644 arch/powerpc/platforms/pseries/vas.c create mode 100644 arch/powerpc/platforms/pseries/vas.h create mode 100644 arch/powerpc/platforms/pseries/vio.c create mode 100644 arch/powerpc/platforms/pseries/vphn.c (limited to 'arch/powerpc/platforms') diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig new file mode 100644 index 0000000000..b3c466c505 --- /dev/null +++ b/arch/powerpc/platforms/40x/Kconfig @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0 +config ACADIA + bool "Acadia" + depends on 40x + select PPC40x_SIMPLE + select 405EZ + help + This option enables support for the AMCC 405EZ Acadia evaluation board. + +config HOTFOOT + bool "Hotfoot" + depends on 40x + select PPC40x_SIMPLE + select FORCE_PCI + help + This option enables support for the ESTEEM 195E Hotfoot board. + +config KILAUEA + bool "Kilauea" + depends on 40x + select 405EX + select PPC40x_SIMPLE + select PPC4xx_PCI_EXPRESS + select FORCE_PCI + select PCI_MSI + help + This option enables support for the AMCC PPC405EX evaluation board. + +config MAKALU + bool "Makalu" + depends on 40x + select 405EX + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + select PPC40x_SIMPLE + help + This option enables support for the AMCC PPC405EX board. + +config OBS600 + bool "OpenBlockS 600" + depends on 40x + select 405EX + select PPC40x_SIMPLE + help + This option enables support for PlatHome OpenBlockS 600 server + +config PPC40x_SIMPLE + bool "Simple PowerPC 40x board support" + depends on 40x + help + This option enables the simple PowerPC 40x platform support. + +config 405EX + bool + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + +config 405EZ + bool + select IBM_EMAC_NO_FLOW_CTRL if IBM_EMAC + select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC + select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC + +config PPC4xx_GPIO + bool "PPC4xx GPIO support" + depends on 40x + select GPIOLIB + select OF_GPIO_MM_GPIOCHIP + help + Enable gpiolib support for ppc40x based boards + +config APM8018X + bool "APM8018X" + depends on 40x + select PPC40x_SIMPLE + help + This option enables support for the AppliedMicro APM8018X evaluation + board. diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile new file mode 100644 index 0000000000..122de98527 --- /dev/null +++ b/arch/powerpc/platforms/40x/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_PPC40x_SIMPLE) += ppc40x_simple.o diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c new file mode 100644 index 0000000000..e454e9d2ef --- /dev/null +++ b/arch/powerpc/platforms/40x/ppc40x_simple.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Generic PowerPC 40x platform support + * + * Copyright 2008 IBM Corporation + * + * This implements simple platform support for PowerPC 44x chips. This is + * mostly used for eval boards or other simple and "generic" 44x boards. If + * your board has custom functions or hardware, then you will likely want to + * implement your own board.c file to accommodate it. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static const struct of_device_id ppc40x_of_bus[] __initconst = { + { .compatible = "ibm,plb3", }, + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + { .compatible = "simple-bus", }, + {}, +}; + +static int __init ppc40x_device_probe(void) +{ + of_platform_bus_probe(NULL, ppc40x_of_bus, NULL); + + return 0; +} +machine_device_initcall(ppc40x_simple, ppc40x_device_probe); + +/* This is the list of boards that can be supported by this simple + * platform code. This does _not_ mean the boards are compatible, + * as they most certainly are not from a device tree perspective. + * However, their differences are handled by the device tree and the + * drivers and therefore they don't need custom board support files. + * + * Again, if your board needs to do things differently then create a + * board.c file for it rather than adding it to this list. + */ +static const char * const board[] __initconst = { + "amcc,acadia", + "amcc,haleakala", + "amcc,kilauea", + "amcc,makalu", + "apm,klondike", + "est,hotfoot", + "plathome,obs600", + NULL +}; + +static int __init ppc40x_probe(void) +{ + if (of_device_compatible_match(of_root, board)) { + pci_set_flags(PCI_REASSIGN_ALL_RSRC); + return 1; + } + + return 0; +} + +define_machine(ppc40x_simple) { + .name = "PowerPC 40x Platform", + .probe = ppc40x_probe, + .progress = udbg_progress, + .init_IRQ = uic_init_tree, + .get_irq = uic_get_irq, + .restart = ppc4xx_reset_system, +}; diff --git a/arch/powerpc/platforms/44x/44x.h b/arch/powerpc/platforms/44x/44x.h new file mode 100644 index 0000000000..0e912a6a0b --- /dev/null +++ b/arch/powerpc/platforms/44x/44x.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __POWERPC_PLATFORMS_44X_44X_H +#define __POWERPC_PLATFORMS_44X_44X_H + +extern u8 as1_readb(volatile u8 __iomem *addr); +extern void as1_writeb(u8 data, volatile u8 __iomem *addr); + +#define GPIO0_OSRH 0xC +#define GPIO0_TSRH 0x14 +#define GPIO0_ISR1H 0x34 + +#endif /* __POWERPC_PLATFORMS_44X_44X_H */ diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig new file mode 100644 index 0000000000..35a1f4b9f8 --- /dev/null +++ b/arch/powerpc/platforms/44x/Kconfig @@ -0,0 +1,319 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_47x + bool "Support for 47x variant" + depends on 44x + select MPIC + help + This option enables support for the 47x family of processors and is + not currently compatible with other 44x or 46x variants + +config BAMBOO + bool "Bamboo" + depends on 44x + select PPC44x_SIMPLE + select 440EP + select FORCE_PCI + help + This option enables support for the IBM PPC440EP evaluation board. + +config BLUESTONE + bool "Bluestone" + depends on 44x + select PPC44x_SIMPLE + select APM821xx + select FORCE_PCI + select PCI_MSI + select PPC4xx_PCI_EXPRESS + select IBM_EMAC_RGMII if IBM_EMAC + help + This option enables support for the APM APM821xx Evaluation board. + +config EBONY + bool "Ebony" + depends on 44x + default y + select 440GP + select FORCE_PCI + select OF_RTC + help + This option enables support for the IBM PPC440GP evaluation board. + +config SAM440EP + bool "Sam440ep" + depends on 44x + select 440EP + select FORCE_PCI + help + This option enables support for the ACube Sam440ep board. + +config SEQUOIA + bool "Sequoia" + depends on 44x + select PPC44x_SIMPLE + select 440EPX + help + This option enables support for the AMCC PPC440EPX evaluation board. + +config TAISHAN + bool "Taishan" + depends on 44x + select PPC44x_SIMPLE + select 440GX + select FORCE_PCI + help + This option enables support for the AMCC PPC440GX "Taishan" + evaluation board. + +config KATMAI + bool "Katmai" + depends on 44x + select PPC44x_SIMPLE + select 440SPe + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + select PCI_MSI + help + This option enables support for the AMCC PPC440SPe evaluation board. + +config RAINIER + bool "Rainier" + depends on 44x + select PPC44x_SIMPLE + select 440GRX + select FORCE_PCI + help + This option enables support for the AMCC PPC440GRX evaluation board. + +config WARP + bool "PIKA Warp" + depends on 44x + select 440EP + help + This option enables support for the PIKA Warp(tm) Appliance. The Warp + is a small computer replacement with up to 9 ports of FXO/FXS plus VOIP + stations and trunks. + + See http://www.pikatechnologies.com/ and follow the "PIKA for Computer + Telephony Developers" link for more information. + +config ARCHES + bool "Arches" + depends on 44x + select PPC44x_SIMPLE + select 460EX # Odd since it uses 460GT but the effects are the same + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + help + This option enables support for the AMCC Dual PPC460GT evaluation board. + +config CANYONLANDS + bool "Canyonlands" + depends on 44x + select 460EX + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + select PCI_MSI + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC + help + This option enables support for the AMCC PPC460EX evaluation board. + +config GLACIER + bool "Glacier" + depends on 44x + select PPC44x_SIMPLE + select 460EX # Odd since it uses 460GT but the effects are the same + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC + help + This option enables support for the AMCC PPC460GT evaluation board. + +config REDWOOD + bool "Redwood" + depends on 44x + select PPC44x_SIMPLE + select 460SX + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + select PCI_MSI + help + This option enables support for the AMCC PPC460SX Redwood board. + +config EIGER + bool "Eiger" + depends on 44x + select PPC44x_SIMPLE + select 460SX + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + select IBM_EMAC_RGMII if IBM_EMAC + help + This option enables support for the AMCC PPC460SX evaluation board. + +config YOSEMITE + bool "Yosemite" + depends on 44x + select PPC44x_SIMPLE + select 440EP + select FORCE_PCI + help + This option enables support for the AMCC PPC440EP evaluation board. + +config ISS4xx + bool "ISS 4xx Simulator" + depends on 44x + select 440GP if 44x && !PPC_47x + select PPC_FPU + select OF_RTC + help + This option enables support for the IBM ISS simulation environment + +config CURRITUCK + bool "IBM Currituck (476fpe) Support" + depends on PPC_47x + select I2C + select SWIOTLB + select 476FPE + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + help + This option enables support for the IBM Currituck (476fpe) evaluation board + +config FSP2 + bool "IBM FSP2 (476fpe) Support" + depends on PPC_47x + select 476FPE + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select COMMON_CLK + select DEFAULT_UIMAGE + help + This option enables support for the IBM FSP2 (476fpe) board + +config AKEBONO + bool "IBM Akebono (476gtr) Support" + depends on PPC_47x + select SWIOTLB + select 476FPE + select PPC4xx_PCI_EXPRESS + select FORCE_PCI + select PCI_MSI + select PPC4xx_HSTA_MSI + select I2C + select I2C_IBM_IIC + select IBM_EMAC_EMAC4 if IBM_EMAC + select USB if USB_SUPPORT + select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD + select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD + help + This option enables support for the IBM Akebono (476gtr) evaluation board + + +config ICON + bool "Icon" + depends on 44x + select PPC44x_SIMPLE + select 440SPe + select FORCE_PCI + select PPC4xx_PCI_EXPRESS + help + This option enables support for the AMCC PPC440SPe evaluation board. + +config PPC44x_SIMPLE + bool "Simple PowerPC 44x board support" + depends on 44x + help + This option enables the simple PowerPC 44x platform support. + +config PPC4xx_GPIO + bool "PPC4xx GPIO support" + depends on 44x + select GPIOLIB + select OF_GPIO_MM_GPIOCHIP + help + Enable gpiolib support for ppc440 based boards + +# 44x specific CPU modules, selected based on the board above. +config 440EP + bool + select PPC_FPU + select IBM440EP_ERR42 + select IBM_EMAC_ZMII if IBM_EMAC + +config 440EPX + bool + select PPC_FPU + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC + select USB_EHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_DESC + +config 440GRX + bool + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC + +config 440GP + bool + select IBM_EMAC_ZMII if IBM_EMAC + +config 440GX + bool + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC #test only + select IBM_EMAC_TAH if IBM_EMAC #test only + +config 440SP + bool + +config 440SPe + bool + select IBM_EMAC_EMAC4 if IBM_EMAC + +config 460EX + bool + select PPC_FPU + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC + +config 460SX + bool + select PPC_FPU + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC + +config 476FPE + bool + select PPC_FPU + +config APM821xx + bool + select PPC_FPU + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_TAH if IBM_EMAC + +config 476FPE_ERR46 + depends on 476FPE + bool "Enable linker work around for PPC476FPE errata #46" + help + This option enables a work around for an icache bug on 476 + that can cause execution of stale instructions when falling + through pages (IBM errata #46). It requires a recent version + of binutils which supports the --ppc476-workaround option. + + The work around enables the appropriate linker options and + ensures that all module output sections are aligned to 4K + page boundaries. The work around is only required when + building modules. + +# 44x errata/workaround config symbols, selected by the CPU models above +config IBM440EP_ERR42 + bool + diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile new file mode 100644 index 0000000000..5ba031f576 --- /dev/null +++ b/arch/powerpc/platforms/44x/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y += misc_44x.o machine_check.o +ifneq ($(CONFIG_PPC4xx_CPM),y) +obj-y += idle.o +endif +obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o +obj-$(CONFIG_EBONY) += ebony.o +obj-$(CONFIG_SAM440EP) += sam440ep.o +obj-$(CONFIG_WARP) += warp.o +obj-$(CONFIG_ISS4xx) += iss4xx.o +obj-$(CONFIG_CANYONLANDS)+= canyonlands.o +obj-$(CONFIG_CURRITUCK) += ppc476.o +obj-$(CONFIG_AKEBONO) += ppc476.o +obj-$(CONFIG_FSP2) += fsp2.o diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c new file mode 100644 index 0000000000..8742a10d9e --- /dev/null +++ b/arch/powerpc/platforms/44x/canyonlands.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This contain platform specific code for APM PPC460EX based Canyonlands + * board. + * + * Copyright (c) 2010, Applied Micro Circuits Corporation + * Author: Rupjyoti Sarmah + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "44x.h" + +#define BCSR_USB_EN 0x11 + +static const struct of_device_id ppc460ex_of_bus[] __initconst = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + { .compatible = "simple-bus", }, + {}, +}; + +static int __init ppc460ex_device_probe(void) +{ + of_platform_bus_probe(NULL, ppc460ex_of_bus, NULL); + + return 0; +} +machine_device_initcall(canyonlands, ppc460ex_device_probe); + +/* Using this code only for the Canyonlands board. */ + +static int __init ppc460ex_probe(void) +{ + pci_set_flags(PCI_REASSIGN_ALL_RSRC); + + return 1; +} + +/* USB PHY fixup code on Canyonlands kit. */ + +static int __init ppc460ex_canyonlands_fixup(void) +{ + u8 __iomem *bcsr ; + void __iomem *vaddr; + struct device_node *np; + int ret = 0; + + np = of_find_compatible_node(NULL, NULL, "amcc,ppc460ex-bcsr"); + if (!np) { + printk(KERN_ERR "failed did not find amcc, ppc460ex bcsr node\n"); + return -ENODEV; + } + + bcsr = of_iomap(np, 0); + of_node_put(np); + + if (!bcsr) { + printk(KERN_CRIT "Could not remap bcsr\n"); + ret = -ENODEV; + goto err_bcsr; + } + + np = of_find_compatible_node(NULL, NULL, "ibm,ppc4xx-gpio"); + if (!np) { + printk(KERN_ERR "failed did not find ibm,ppc4xx-gpio node\n"); + return -ENODEV; + } + + vaddr = of_iomap(np, 0); + of_node_put(np); + + if (!vaddr) { + printk(KERN_CRIT "Could not get gpio node address\n"); + ret = -ENODEV; + goto err_gpio; + } + /* Disable USB, through the BCSR7 bits */ + setbits8(&bcsr[7], BCSR_USB_EN); + + /* Wait for a while after reset */ + msleep(100); + + /* Enable USB here */ + clrbits8(&bcsr[7], BCSR_USB_EN); + + /* + * Configure multiplexed gpio16 and gpio19 as alternate1 output + * source after USB reset. In this configuration gpio16 will be + * USB2HStop and gpio19 will be USB2DStop. For more details refer to + * table 34-7 of PPC460EX user manual. + */ + setbits32((vaddr + GPIO0_OSRH), 0x42000000); + setbits32((vaddr + GPIO0_TSRH), 0x42000000); +err_gpio: + iounmap(vaddr); +err_bcsr: + iounmap(bcsr); + return ret; +} +machine_device_initcall(canyonlands, ppc460ex_canyonlands_fixup); +define_machine(canyonlands) { + .name = "Canyonlands", + .compatible = "amcc,canyonlands", + .probe = ppc460ex_probe, + .progress = udbg_progress, + .init_IRQ = uic_init_tree, + .get_irq = uic_get_irq, + .restart = ppc4xx_reset_system, +}; diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c new file mode 100644 index 0000000000..4861310c8d --- /dev/null +++ b/arch/powerpc/platforms/44x/ebony.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Ebony board specific routines + * + * Matt Porter + * Copyright 2002-2005 MontaVista Software Inc. + * + * Eugene Surovegin or + * Copyright (c) 2003-2005 Zultys Technologies + * + * Rewritten and ported to the merged powerpc tree: + * Copyright 2007 David Gibson , IBM Corporation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static const struct of_device_id ebony_of_bus[] __initconst = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + {}, +}; + +static int __init ebony_device_probe(void) +{ + of_platform_bus_probe(NULL, ebony_of_bus, NULL); + of_instantiate_rtc(); + + return 0; +} +machine_device_initcall(ebony, ebony_device_probe); + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init ebony_probe(void) +{ + pci_set_flags(PCI_REASSIGN_ALL_RSRC); + + return 1; +} + +define_machine(ebony) { + .name = "Ebony", + .compatible = "ibm,ebony", + .probe = ebony_probe, + .progress = udbg_progress, + .init_IRQ = uic_init_tree, + .get_irq = uic_get_irq, + .restart = ppc4xx_reset_system, +}; diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c new file mode 100644 index 0000000000..f6b8d02e08 --- /dev/null +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * FSP-2 board specific routines + * + * Based on earlier code: + * Matt Porter + * Copyright 2002-2005 MontaVista Software Inc. + * + * Eugene Surovegin or + * Copyright (c) 2003-2005 Zultys Technologies + * + * Rewritten and ported to the merged powerpc tree: + * Copyright 2007 David Gibson , IBM Corporation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "fsp2.h" + +#define FSP2_BUS_ERR "ibm,bus-error-irq" +#define FSP2_CMU_ERR "ibm,cmu-error-irq" +#define FSP2_CONF_ERR "ibm,conf-error-irq" +#define FSP2_OPBD_ERR "ibm,opbd-error-irq" +#define FSP2_MCUE "ibm,mc-ue-irq" +#define FSP2_RST_WRN "ibm,reset-warning-irq" + +static __initdata struct of_device_id fsp2_of_bus[] = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,plb6", }, + { .compatible = "ibm,opb", }, + {}, +}; + +static void l2regs(void) +{ + pr_err("L2 Controller:\n"); + pr_err("MCK: 0x%08x\n", mfl2(L2MCK)); + pr_err("INT: 0x%08x\n", mfl2(L2INT)); + pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0)); + pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1)); + pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0)); + pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1)); + pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2)); + pr_err("CPUSTAT: 0x%08x\n", mfl2(L2CPUSTAT)); + pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0)); + pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0)); + pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1)); + pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2)); + pr_err("WDFSTAT: 0x%08x\n", mfl2(L2WDFSTAT)); + pr_err("LOG0: 0x%08x\n", mfl2(L2LOG0)); + pr_err("LOG1: 0x%08x\n", mfl2(L2LOG1)); + pr_err("LOG2: 0x%08x\n", mfl2(L2LOG2)); + pr_err("LOG3: 0x%08x\n", mfl2(L2LOG3)); + pr_err("LOG4: 0x%08x\n", mfl2(L2LOG4)); + pr_err("LOG5: 0x%08x\n", mfl2(L2LOG5)); +} + +static void show_plbopb_regs(u32 base, int num) +{ + pr_err("\nPLBOPB Bridge %d:\n", num); + pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0)); + pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1)); + pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2)); + pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU)); + pr_err("GEAR: 0x%08x\n", mfdcr(base + PLB4OPB_GEAR)); +} + +static irqreturn_t bus_err_handler(int irq, void *data) +{ + pr_err("Bus Error\n"); + + l2regs(); + + pr_err("\nPLB6 Controller:\n"); + pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD)); + pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR)); + + pr_err("\nPLB6-to-PLB4 Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR)); + pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH)); + pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL)); + + pr_err("\nPLB4-to-PLB6 Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR)); + pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH)); + pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL)); + + pr_err("\nPLB6-to-MCIF Bridge:\n"); + pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0)); + pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1)); + pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH)); + pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL)); + + pr_err("\nPLB4 Arbiter:\n"); + pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH)); + pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL)); + pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH)); + pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH)); + pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH)); + pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL)); + pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH)); + pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH)); + + show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0); + show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1); + show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2); + show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3); + + pr_err("\nPLB4-to-AHB Bridge:\n"); + pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR)); + pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR)); + pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR)); + + pr_err("\nAHB-to-PLB4 Bridge:\n"); + pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR)); + pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR)); + panic("Bus Error\n"); +} + +static irqreturn_t cmu_err_handler(int irq, void *data) { + pr_err("CMU Error\n"); + pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0)); + panic("CMU Error\n"); +} + +static irqreturn_t conf_err_handler(int irq, void *data) { + pr_err("Configuration Logic Error\n"); + pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC)); + pr_err("RPERR0: 0x%08x\n", mfdcr(DCRN_CONF_RPERR0)); + pr_err("RPERR1: 0x%08x\n", mfdcr(DCRN_CONF_RPERR1)); + panic("Configuration Logic Error\n"); +} + +static irqreturn_t opbd_err_handler(int irq, void *data) { + panic("OPBD Error\n"); +} + +static irqreturn_t mcue_handler(int irq, void *data) { + pr_err("DDR: Uncorrectable Error\n"); + pr_err("MCSTAT: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT)); + pr_err("MCOPT1: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1)); + pr_err("MCOPT2: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2)); + pr_err("PHYSTAT: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT)); + pr_err("CFGR0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0)); + pr_err("CFGR1: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1)); + pr_err("CFGR2: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2)); + pr_err("CFGR3: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3)); + pr_err("SCRUB_CNTL: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL)); + pr_err("ECCERR_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0)); + pr_err("ECCERR_ADDR_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0)); + pr_err("ECCERR_CNT_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0)); + pr_err("ECC_CHECK_PORT0: 0x%08x\n", + mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0)); + pr_err("MCER0: 0x%08x\n", + mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0)); + pr_err("MCER1: 0x%08x\n", + mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1)); + pr_err("BESR: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BESR0)); + pr_err("BEARL: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BEARL)); + pr_err("BEARH: 0x%08x\n", + mfdcr(DCRN_PLB6MCIF_BEARH)); + panic("DDR: Uncorrectable Error\n"); +} + +static irqreturn_t rst_wrn_handler(int irq, void *data) { + u32 crcs = mfcmu(CMUN_CRCS); + switch (crcs & CRCS_STAT_MASK) { + case CRCS_STAT_CHIP_RST_B: + panic("Received chassis-initiated reset request"); + default: + panic("Unknown external reset: CRCS=0x%x", crcs); + } +} + +static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler) +{ + struct device_node *np; + unsigned int irq; + int32_t rc; + + for_each_compatible_node(np, NULL, compat) { + irq = irq_of_parse_and_map(np, 0); + if (!irq) { + pr_err("device tree node %pOFn is missing a interrupt", + np); + of_node_put(np); + return; + } + + rc = request_irq(irq, errirq_handler, 0, np->name, np); + if (rc) { + pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d", + np, rc); + of_node_put(np); + return; + } + } +} + +static void __init critical_irq_setup(void) +{ + node_irq_request(FSP2_CMU_ERR, cmu_err_handler); + node_irq_request(FSP2_BUS_ERR, bus_err_handler); + node_irq_request(FSP2_CONF_ERR, conf_err_handler); + node_irq_request(FSP2_OPBD_ERR, opbd_err_handler); + node_irq_request(FSP2_MCUE, mcue_handler); + node_irq_request(FSP2_RST_WRN, rst_wrn_handler); +} + +static int __init fsp2_device_probe(void) +{ + of_platform_bus_probe(NULL, fsp2_of_bus, NULL); + return 0; +} +machine_device_initcall(fsp2, fsp2_device_probe); + +static int __init fsp2_probe(void) +{ + u32 val; + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "ibm,fsp2")) + return 0; + + /* Clear BC_ERR and mask snoopable request plb errors. */ + val = mfdcr(DCRN_PLB6_CR0); + val |= 0x20000000; + mtdcr(DCRN_PLB6_BASE, val); + mtdcr(DCRN_PLB6_HD, 0xffff0000); + mtdcr(DCRN_PLB6_SHD, 0xffff0000); + + /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS + * sleep config bit. As a consequence, TVSENSE will provide erratic + * sensor values, which may result in spurious (parity) errors + * recorded in the CMU FIR and leading to erroneous interrupt requests + * once the CMU interrupt is unmasked. + */ + + /* 1. set TVS1[UNDOZE] */ + val = mfcmu(CMUN_TVS1); + val |= 0x4; + mtcmu(CMUN_TVS1, val); + + /* 2. clear FIR[TVS] and FIR[TVSPAR] */ + val = mfcmu(CMUN_FIR0); + val |= 0x30000000; + mtcmu(CMUN_FIR0, val); + + /* L2 machine checks */ + mtl2(L2PLBMCKEN0, 0xffffffff); + mtl2(L2PLBMCKEN1, 0x0000ffff); + mtl2(L2ARRMCKEN0, 0xffffffff); + mtl2(L2ARRMCKEN1, 0xffffffff); + mtl2(L2ARRMCKEN2, 0xfffff000); + mtl2(L2CPUMCKEN, 0xffffffff); + mtl2(L2RACMCKEN0, 0xffffffff); + mtl2(L2WACMCKEN0, 0xffffffff); + mtl2(L2WACMCKEN1, 0xffffffff); + mtl2(L2WACMCKEN2, 0xffffffff); + mtl2(L2WDFMCKEN, 0xffffffff); + + /* L2 interrupts */ + mtl2(L2PLBINTEN1, 0xffff0000); + + /* + * At a global level, enable all L2 machine checks and interrupts + * reported by the L2 subsystems, except for the external machine check + * input (UIC0.1). + */ + mtl2(L2MCKEN, 0x000007ff); + mtl2(L2INTEN, 0x000004ff); + + /* Enable FSP-2 configuration logic parity errors */ + mtdcr(DCRN_CONF_EIR_RS, 0x80000000); + return 1; +} + +static void __init fsp2_irq_init(void) +{ + uic_init_tree(); + critical_irq_setup(); +} + +define_machine(fsp2) { + .name = "FSP-2", + .probe = fsp2_probe, + .progress = udbg_progress, + .init_IRQ = fsp2_irq_init, + .get_irq = uic_get_irq, + .restart = ppc4xx_reset_system, +}; diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h new file mode 100644 index 0000000000..9e1d52754c --- /dev/null +++ b/arch/powerpc/platforms/44x/fsp2.h @@ -0,0 +1,272 @@ +#ifndef _ASM_POWERPC_FSP_DCR_H_ +#define _ASM_POWERPC_FSP_DCR_H_ +#ifdef __KERNEL__ +#include + +#define DCRN_CMU_ADDR 0x00C /* Chip management unic addr */ +#define DCRN_CMU_DATA 0x00D /* Chip management unic data */ + +/* PLB4 Arbiter */ +#define DCRN_PLB4_PCBI 0x010 /* PLB Crossbar ID/Rev Register */ +#define DCRN_PLB4_P0ACR 0x011 /* PLB0 Arbiter Control Register */ +#define DCRN_PLB4_P0ESRL 0x012 /* PLB0 Error Status Register Low */ +#define DCRN_PLB4_P0ESRH 0x013 /* PLB0 Error Status Register High */ +#define DCRN_PLB4_P0EARL 0x014 /* PLB0 Error Address Register Low */ +#define DCRN_PLB4_P0EARH 0x015 /* PLB0 Error Address Register High */ +#define DCRN_PLB4_P0ESRLS 0x016 /* PLB0 Error Status Register Low Set*/ +#define DCRN_PLB4_P0ESRHS 0x017 /* PLB0 Error Status Register High */ +#define DCRN_PLB4_PCBC 0x018 /* PLB Crossbar Control Register */ +#define DCRN_PLB4_P1ACR 0x019 /* PLB1 Arbiter Control Register */ +#define DCRN_PLB4_P1ESRL 0x01A /* PLB1 Error Status Register Low */ +#define DCRN_PLB4_P1ESRH 0x01B /* PLB1 Error Status Register High */ +#define DCRN_PLB4_P1EARL 0x01C /* PLB1 Error Address Register Low */ +#define DCRN_PLB4_P1EARH 0x01D /* PLB1 Error Address Register High */ +#define DCRN_PLB4_P1ESRLS 0x01E /* PLB1 Error Status Register Low Set*/ +#define DCRN_PLB4_P1ESRHS 0x01F /*PLB1 Error Status Register High Set*/ + +/* PLB4/OPB bridge 0, 1, 2, 3 */ +#define DCRN_PLB4OPB0_BASE 0x020 +#define DCRN_PLB4OPB1_BASE 0x030 +#define DCRN_PLB4OPB2_BASE 0x040 +#define DCRN_PLB4OPB3_BASE 0x050 + +#define PLB4OPB_GESR0 0x0 /* Error status 0: Master Dev 0-3 */ +#define PLB4OPB_GEAR 0x2 /* Error Address Register */ +#define PLB4OPB_GEARU 0x3 /* Error Upper Address Register */ +#define PLB4OPB_GESR1 0x4 /* Error Status 1: Master Dev 4-7 */ +#define PLB4OPB_GESR2 0xC /* Error Status 2: Master Dev 8-11 */ + +/* PLB4-to-AHB Bridge */ +#define DCRN_PLB4AHB_BASE 0x400 +#define DCRN_PLB4AHB_SEUAR (DCRN_PLB4AHB_BASE + 1) +#define DCRN_PLB4AHB_SELAR (DCRN_PLB4AHB_BASE + 2) +#define DCRN_PLB4AHB_ESR (DCRN_PLB4AHB_BASE + 3) +#define DCRN_AHBPLB4_ESR (DCRN_PLB4AHB_BASE + 8) +#define DCRN_AHBPLB4_EAR (DCRN_PLB4AHB_BASE + 9) + +/* PLB6 Controller */ +#define DCRN_PLB6_BASE 0x11111300 +#define DCRN_PLB6_CR0 (DCRN_PLB6_BASE) +#define DCRN_PLB6_ERR (DCRN_PLB6_BASE + 0x0B) +#define DCRN_PLB6_HD (DCRN_PLB6_BASE + 0x0E) +#define DCRN_PLB6_SHD (DCRN_PLB6_BASE + 0x10) + +/* PLB4-to-PLB6 Bridge */ +#define DCRN_PLB4PLB6_BASE 0x11111320 +#define DCRN_PLB4PLB6_ESR (DCRN_PLB4PLB6_BASE + 1) +#define DCRN_PLB4PLB6_EARH (DCRN_PLB4PLB6_BASE + 3) +#define DCRN_PLB4PLB6_EARL (DCRN_PLB4PLB6_BASE + 4) + +/* PLB6-to-PLB4 Bridge */ +#define DCRN_PLB6PLB4_BASE 0x11111350 +#define DCRN_PLB6PLB4_ESR (DCRN_PLB6PLB4_BASE + 1) +#define DCRN_PLB6PLB4_EARH (DCRN_PLB6PLB4_BASE + 3) +#define DCRN_PLB6PLB4_EARL (DCRN_PLB6PLB4_BASE + 4) + +/* PLB6-to-MCIF Bridge */ +#define DCRN_PLB6MCIF_BASE 0x11111380 +#define DCRN_PLB6MCIF_BESR0 (DCRN_PLB6MCIF_BASE + 0) +#define DCRN_PLB6MCIF_BESR1 (DCRN_PLB6MCIF_BASE + 1) +#define DCRN_PLB6MCIF_BEARL (DCRN_PLB6MCIF_BASE + 2) +#define DCRN_PLB6MCIF_BEARH (DCRN_PLB6MCIF_BASE + 3) + +/* Configuration Logic Registers */ +#define DCRN_CONF_BASE 0x11111400 +#define DCRN_CONF_FIR_RWC (DCRN_CONF_BASE + 0x3A) +#define DCRN_CONF_EIR_RS (DCRN_CONF_BASE + 0x3E) +#define DCRN_CONF_RPERR0 (DCRN_CONF_BASE + 0x4D) +#define DCRN_CONF_RPERR1 (DCRN_CONF_BASE + 0x4E) + +#define DCRN_L2CDCRAI 0x11111100 +#define DCRN_L2CDCRDI 0x11111104 +/* L2 indirect addresses */ +#define L2MCK 0x120 +#define L2MCKEN 0x130 +#define L2INT 0x150 +#define L2INTEN 0x160 +#define L2LOG0 0x180 +#define L2LOG1 0x184 +#define L2LOG2 0x188 +#define L2LOG3 0x18C +#define L2LOG4 0x190 +#define L2LOG5 0x194 +#define L2PLBSTAT0 0x300 +#define L2PLBSTAT1 0x304 +#define L2PLBMCKEN0 0x330 +#define L2PLBMCKEN1 0x334 +#define L2PLBINTEN0 0x360 +#define L2PLBINTEN1 0x364 +#define L2ARRSTAT0 0x500 +#define L2ARRSTAT1 0x504 +#define L2ARRSTAT2 0x508 +#define L2ARRMCKEN0 0x530 +#define L2ARRMCKEN1 0x534 +#define L2ARRMCKEN2 0x538 +#define L2ARRINTEN0 0x560 +#define L2ARRINTEN1 0x564 +#define L2ARRINTEN2 0x568 +#define L2CPUSTAT 0x700 +#define L2CPUMCKEN 0x730 +#define L2CPUINTEN 0x760 +#define L2RACSTAT0 0x900 +#define L2RACMCKEN0 0x930 +#define L2RACINTEN0 0x960 +#define L2WACSTAT0 0xD00 +#define L2WACSTAT1 0xD04 +#define L2WACSTAT2 0xD08 +#define L2WACMCKEN0 0xD30 +#define L2WACMCKEN1 0xD34 +#define L2WACMCKEN2 0xD38 +#define L2WACINTEN0 0xD60 +#define L2WACINTEN1 0xD64 +#define L2WACINTEN2 0xD68 +#define L2WDFSTAT 0xF00 +#define L2WDFMCKEN 0xF30 +#define L2WDFINTEN 0xF60 + +/* DDR3/4 Memory Controller */ +#define DCRN_DDR34_BASE 0x11120000 +#define DCRN_DDR34_MCSTAT 0x10 +#define DCRN_DDR34_MCOPT1 0x20 +#define DCRN_DDR34_MCOPT2 0x21 +#define DCRN_DDR34_PHYSTAT 0x32 +#define DCRN_DDR34_CFGR0 0x40 +#define DCRN_DDR34_CFGR1 0x41 +#define DCRN_DDR34_CFGR2 0x42 +#define DCRN_DDR34_CFGR3 0x43 +#define DCRN_DDR34_SCRUB_CNTL 0xAA +#define DCRN_DDR34_SCRUB_INT 0xAB +#define DCRN_DDR34_SCRUB_START_ADDR 0xB0 +#define DCRN_DDR34_SCRUB_END_ADDR 0xD0 +#define DCRN_DDR34_ECCERR_ADDR_PORT0 0xE0 +#define DCRN_DDR34_ECCERR_ADDR_PORT1 0xE1 +#define DCRN_DDR34_ECCERR_ADDR_PORT2 0xE2 +#define DCRN_DDR34_ECCERR_ADDR_PORT3 0xE3 +#define DCRN_DDR34_ECCERR_COUNT_PORT0 0xE4 +#define DCRN_DDR34_ECCERR_COUNT_PORT1 0xE5 +#define DCRN_DDR34_ECCERR_COUNT_PORT2 0xE6 +#define DCRN_DDR34_ECCERR_COUNT_PORT3 0xE7 +#define DCRN_DDR34_ECCERR_PORT0 0xF0 +#define DCRN_DDR34_ECCERR_PORT1 0xF2 +#define DCRN_DDR34_ECCERR_PORT2 0xF4 +#define DCRN_DDR34_ECCERR_PORT3 0xF6 +#define DCRN_DDR34_ECC_CHECK_PORT0 0xF8 +#define DCRN_DDR34_ECC_CHECK_PORT1 0xF9 +#define DCRN_DDR34_ECC_CHECK_PORT2 0xF9 +#define DCRN_DDR34_ECC_CHECK_PORT3 0xFB + +#define DDR34_SCRUB_CNTL_STOP 0x00000000 +#define DDR34_SCRUB_CNTL_SCRUB 0x80000000 +#define DDR34_SCRUB_CNTL_UE_STOP 0x20000000 +#define DDR34_SCRUB_CNTL_CE_STOP 0x10000000 +#define DDR34_SCRUB_CNTL_RANK_EN 0x00008000 + +/* PLB-Attached DDR3/4 Core Wrapper */ +#define DCRN_CW_BASE 0x11111800 +#define DCRN_CW_MCER0 0x00 +#define DCRN_CW_MCER1 0x01 +#define DCRN_CW_MCER_AND0 0x02 +#define DCRN_CW_MCER_AND1 0x03 +#define DCRN_CW_MCER_OR0 0x04 +#define DCRN_CW_MCER_OR1 0x05 +#define DCRN_CW_MCER_MASK0 0x06 +#define DCRN_CW_MCER_MASK1 0x07 +#define DCRN_CW_MCER_MASK_AND0 0x08 +#define DCRN_CW_MCER_MASK_AND1 0x09 +#define DCRN_CW_MCER_MASK_OR0 0x0A +#define DCRN_CW_MCER_MASK_OR1 0x0B +#define DCRN_CW_MCER_ACTION0 0x0C +#define DCRN_CW_MCER_ACTION1 0x0D +#define DCRN_CW_MCER_WOF0 0x0E +#define DCRN_CW_MCER_WOF1 0x0F +#define DCRN_CW_LFIR 0x10 +#define DCRN_CW_LFIR_AND 0x11 +#define DCRN_CW_LFIR_OR 0x12 +#define DCRN_CW_LFIR_MASK 0x13 +#define DCRN_CW_LFIR_MASK_AND 0x14 +#define DCRN_CW_LFIR_MASK_OR 0x15 + +#define CW_MCER0_MEM_CE 0x00020000 +/* CMU addresses */ +#define CMUN_CRCS 0x00 /* Chip Reset Control/Status */ +#define CMUN_CONFFIR0 0x20 /* Config Reg Parity FIR 0 */ +#define CMUN_CONFFIR1 0x21 /* Config Reg Parity FIR 1 */ +#define CMUN_CONFFIR2 0x22 /* Config Reg Parity FIR 2 */ +#define CMUN_CONFFIR3 0x23 /* Config Reg Parity FIR 3 */ +#define CMUN_URCR3_RS 0x24 /* Unit Reset Control Reg 3 Set */ +#define CMUN_URCR3_C 0x25 /* Unit Reset Control Reg 3 Clear */ +#define CMUN_URCR3_P 0x26 /* Unit Reset Control Reg 3 Pulse */ +#define CMUN_PW0 0x2C /* Pulse Width Register */ +#define CMUN_URCR0_P 0x2D /* Unit Reset Control Reg 0 Pulse */ +#define CMUN_URCR1_P 0x2E /* Unit Reset Control Reg 1 Pulse */ +#define CMUN_URCR2_P 0x2F /* Unit Reset Control Reg 2 Pulse */ +#define CMUN_CLS_RW 0x30 /* Code Load Status (Read/Write) */ +#define CMUN_CLS_S 0x31 /* Code Load Status (Set) */ +#define CMUN_CLS_C 0x32 /* Code Load Status (Clear */ +#define CMUN_URCR2_RS 0x33 /* Unit Reset Control Reg 2 Set */ +#define CMUN_URCR2_C 0x34 /* Unit Reset Control Reg 2 Clear */ +#define CMUN_CLKEN0 0x35 /* Clock Enable 0 */ +#define CMUN_CLKEN1 0x36 /* Clock Enable 1 */ +#define CMUN_PCD0 0x37 /* PSI clock divider 0 */ +#define CMUN_PCD1 0x38 /* PSI clock divider 1 */ +#define CMUN_TMR0 0x39 /* Reset Timer */ +#define CMUN_TVS0 0x3A /* TV Sense Reg 0 */ +#define CMUN_TVS1 0x3B /* TV Sense Reg 1 */ +#define CMUN_MCCR 0x3C /* DRAM Configuration Reg */ +#define CMUN_FIR0 0x3D /* Fault Isolation Reg 0 */ +#define CMUN_FMR0 0x3E /* FIR Mask Reg 0 */ +#define CMUN_ETDRB 0x3F /* ETDR Backdoor */ + +/* CRCS bit fields */ +#define CRCS_STAT_MASK 0xF0000000 +#define CRCS_STAT_POR 0x10000000 +#define CRCS_STAT_PHR 0x20000000 +#define CRCS_STAT_PCIE 0x30000000 +#define CRCS_STAT_CRCS_SYS 0x40000000 +#define CRCS_STAT_DBCR_SYS 0x50000000 +#define CRCS_STAT_HOST_SYS 0x60000000 +#define CRCS_STAT_CHIP_RST_B 0x70000000 +#define CRCS_STAT_CRCS_CHIP 0x80000000 +#define CRCS_STAT_DBCR_CHIP 0x90000000 +#define CRCS_STAT_HOST_CHIP 0xA0000000 +#define CRCS_STAT_PSI_CHIP 0xB0000000 +#define CRCS_STAT_CRCS_CORE 0xC0000000 +#define CRCS_STAT_DBCR_CORE 0xD0000000 +#define CRCS_STAT_HOST_CORE 0xE0000000 +#define CRCS_STAT_PCIE_HOT 0xF0000000 +#define CRCS_STAT_SELF_CORE 0x40000000 +#define CRCS_STAT_SELF_CHIP 0x50000000 +#define CRCS_WATCHE 0x08000000 +#define CRCS_CORE 0x04000000 /* Reset PPC440 core */ +#define CRCS_CHIP 0x02000000 /* Chip Reset */ +#define CRCS_SYS 0x01000000 /* System Reset */ +#define CRCS_WRCR 0x00800000 /* Watchdog reset on core reset */ +#define CRCS_EXTCR 0x00080000 /* CHIP_RST_B triggers chip reset */ +#define CRCS_PLOCK 0x00000002 /* PLL Locked */ + +#define mtcmu(reg, data) \ +do { \ + mtdcr(DCRN_CMU_ADDR, reg); \ + mtdcr(DCRN_CMU_DATA, data); \ +} while (0) + +#define mfcmu(reg)\ + ({u32 data; \ + mtdcr(DCRN_CMU_ADDR, reg); \ + data = mfdcr(DCRN_CMU_DATA); \ + data; }) + +#define mtl2(reg, data) \ +do { \ + mtdcr(DCRN_L2CDCRAI, reg); \ + mtdcr(DCRN_L2CDCRDI, data); \ +} while (0) + +#define mfl2(reg) \ + ({u32 data; \ + mtdcr(DCRN_L2CDCRAI, reg); \ + data = mfdcr(DCRN_L2CDCRDI); \ + data; }) + +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_FSP2_DCR_H_ */ diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c new file mode 100644 index 0000000000..f533b495e7 --- /dev/null +++ b/arch/powerpc/platforms/44x/idle.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2008 IBM Corp. + * + * Based on arch/powerpc/platforms/pasemi/idle.c: + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Added by: Jerone Young + */ + +#include +#include +#include + +static int mode_spin; + +static void ppc44x_idle(void) +{ + unsigned long msr_save; + + msr_save = mfmsr(); + /* set wait state MSR */ + mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE); + isync(); + /* return to initial state */ + mtmsr(msr_save); + isync(); +} + +int __init ppc44x_idle_init(void) +{ + if (!mode_spin) { + /* If we are not setting spin mode + then we set to wait mode */ + ppc_md.power_save = &ppc44x_idle; + } + + return 0; +} + +arch_initcall(ppc44x_idle_init); + +static int __init idle_param(char *p) +{ + + if (!strcmp("spin", p)) { + mode_spin = 1; + ppc_md.power_save = NULL; + } + + return 0; +} + +early_param("idle", idle_param); diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c new file mode 100644 index 0000000000..ef883d97fe --- /dev/null +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PPC476 board specific routines + * + * Copyright 2010 Torez Smith, IBM Corporation. + * + * Based on earlier code: + * Matt Porter + * Copyright 2002-2005 MontaVista Software Inc. + * + * Eugene Surovegin or + * Copyright (c) 2003-2005 Zultys Technologies + * + * Rewritten and ported to the merged powerpc tree: + * Copyright 2007 David Gibson , IBM Corporation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct of_device_id iss4xx_of_bus[] __initconst = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,plb6", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + {}, +}; + +static int __init iss4xx_device_probe(void) +{ + of_platform_bus_probe(NULL, iss4xx_of_bus, NULL); + of_instantiate_rtc(); + + return 0; +} +machine_device_initcall(iss4xx, iss4xx_device_probe); + +/* We can have either UICs or MPICs */ +static void __init iss4xx_init_irq(void) +{ + struct device_node *np; + + /* Find top level interrupt controller */ + for_each_node_with_property(np, "interrupt-controller") { + if (!of_property_present(np, "interrupts")) + break; + } + if (np == NULL) + panic("Can't find top level interrupt controller"); + + /* Check type and do appropriate initialization */ + if (of_device_is_compatible(np, "ibm,uic")) { + uic_init_tree(); + ppc_md.get_irq = uic_get_irq; +#ifdef CONFIG_MPIC + } else if (of_device_is_compatible(np, "chrp,open-pic")) { + /* The MPIC driver will get everything it needs from the + * device-tree, just pass 0 to all arguments + */ + struct mpic *mpic = mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); + ppc_md.get_irq = mpic_get_irq; +#endif + } else + panic("Unrecognized top level interrupt controller"); +} + +#ifdef CONFIG_SMP +static void smp_iss4xx_setup_cpu(int cpu) +{ + mpic_setup_this_cpu(); +} + +static int smp_iss4xx_kick_cpu(int cpu) +{ + struct device_node *cpunode = of_get_cpu_node(cpu, NULL); + const u64 *spin_table_addr_prop; + u32 *spin_table; + extern void start_secondary_47x(void); + + BUG_ON(cpunode == NULL); + + /* Assume spin table. We could test for the enable-method in + * the device-tree but currently there's little point as it's + * our only supported method + */ + spin_table_addr_prop = of_get_property(cpunode, "cpu-release-addr", + NULL); + if (spin_table_addr_prop == NULL) { + pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu); + return -ENOENT; + } + + /* Assume it's mapped as part of the linear mapping. This is a bit + * fishy but will work fine for now + */ + spin_table = (u32 *)__va(*spin_table_addr_prop); + pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table); + + spin_table[3] = cpu; + smp_wmb(); + spin_table[1] = __pa(start_secondary_47x); + mb(); + + return 0; +} + +static struct smp_ops_t iss_smp_ops = { + .probe = smp_mpic_probe, + .message_pass = smp_mpic_message_pass, + .setup_cpu = smp_iss4xx_setup_cpu, + .kick_cpu = smp_iss4xx_kick_cpu, + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +}; + +static void __init iss4xx_smp_init(void) +{ + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + smp_ops = &iss_smp_ops; +} + +#else /* CONFIG_SMP */ +static void __init iss4xx_smp_init(void) { } +#endif /* CONFIG_SMP */ + +static void __init iss4xx_setup_arch(void) +{ + iss4xx_smp_init(); +} + +define_machine(iss4xx) { + .name = "ISS-4xx", + .compatible = "ibm,iss-4xx", + .progress = udbg_progress, + .init_IRQ = iss4xx_init_irq, + .setup_arch = iss4xx_setup_arch, + .restart = ppc4xx_reset_system, +}; diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c new file mode 100644 index 0000000000..5d19daacd7 --- /dev/null +++ b/arch/powerpc/platforms/44x/machine_check.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + */ + +#include +#include +#include + +#include +#include + +int machine_check_440A(struct pt_regs *regs) +{ + unsigned long reason = regs->esr; + + printk("Machine check in kernel mode.\n"); + if (reason & ESR_IMCP){ + printk("Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } + else { + u32 mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk("Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk("Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk("Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk("TLB Parity Error\n"); + if (mcsr & MCSR_ICP){ + flush_instruction_cache(); + printk("I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk("D-Cache Search Parity Error\n"); + if (mcsr & MCSR_DCFP) + printk("D-Cache Flush Parity Error\n"); + if (mcsr & MCSR_IMPE) + printk("Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + } + return 0; +} + +#ifdef CONFIG_PPC_47x +int machine_check_47x(struct pt_regs *regs) +{ + unsigned long reason = regs->esr; + u32 mcsr; + + printk(KERN_ERR "Machine check in kernel mode.\n"); + if (reason & ESR_IMCP) { + printk(KERN_ERR "Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + return 0; + } + mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk(KERN_ERR "Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk(KERN_ERR "Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk(KERN_ERR "Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk(KERN_ERR "TLB Parity Error\n"); + if (mcsr & MCSR_ICP) { + flush_instruction_cache(); + printk(KERN_ERR "I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk(KERN_ERR "D-Cache Search Parity Error\n"); + if (mcsr & PPC47x_MCSR_GPR) + printk(KERN_ERR "GPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_FPR) + printk(KERN_ERR "FPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_IPR) + printk(KERN_ERR "Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + + return 0; +} +#endif /* CONFIG_PPC_47x */ diff --git a/arch/powerpc/platforms/44x/misc_44x.S b/arch/powerpc/platforms/44x/misc_44x.S new file mode 100644 index 0000000000..3a0c4bd3d6 --- /dev/null +++ b/arch/powerpc/platforms/44x/misc_44x.S @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains miscellaneous low-level functions for PPC 44x. + * Copyright 2007 David Gibson , IBM Corporation. + */ + +#include +#include + + .text + +/* + * Do an IO access in AS1 + */ +_GLOBAL(as1_readb) + mfmsr r7 + ori r0,r7,MSR_DS + sync + mtmsr r0 + sync + isync + lbz r3,0(r3) + sync + mtmsr r7 + sync + isync + blr + +_GLOBAL(as1_writeb) + mfmsr r7 + ori r0,r7,MSR_DS + sync + mtmsr r0 + sync + isync + stb r3,0(r4) + sync + mtmsr r7 + sync + isync + blr diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c new file mode 100644 index 0000000000..971786ff1a --- /dev/null +++ b/arch/powerpc/platforms/44x/ppc44x_simple.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Generic PowerPC 44x platform support + * + * Copyright 2008 IBM Corporation + * + * This implements simple platform support for PowerPC 44x chips. This is + * mostly used for eval boards or other simple and "generic" 44x boards. If + * your board has custom functions or hardware, then you will likely want to + * implement your own board.c file to accommodate it. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static const struct of_device_id ppc44x_of_bus[] __initconst = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + { .compatible = "simple-bus", }, + {}, +}; + +static int __init ppc44x_device_probe(void) +{ + of_platform_bus_probe(NULL, ppc44x_of_bus, NULL); + + return 0; +} +machine_device_initcall(ppc44x_simple, ppc44x_device_probe); + +/* This is the list of boards that can be supported by this simple + * platform code. This does _not_ mean the boards are compatible, + * as they most certainly are not from a device tree perspective. + * However, their differences are handled by the device tree and the + * drivers and therefore they don't need custom board support files. + * + * Again, if your board needs to do things differently then create a + * board.c file for it rather than adding it to this list. + */ +static char *board[] __initdata = { + "amcc,arches", + "amcc,bamboo", + "apm,bluestone", + "amcc,glacier", + "ibm,ebony", + "amcc,eiger", + "amcc,katmai", + "amcc,rainier", + "amcc,redwood", + "amcc,sequoia", + "amcc,taishan", + "amcc,yosemite", + "mosaixtech,icon" +}; + +static int __init ppc44x_probe(void) +{ + int i = 0; + + for (i = 0; i < ARRAY_SIZE(board); i++) { + if (of_machine_is_compatible(board[i])) { + pci_set_flags(PCI_REASSIGN_ALL_RSRC); + return 1; + } + } + + return 0; +} + +define_machine(ppc44x_simple) { + .name = "PowerPC 44x Platform", + .probe = ppc44x_probe, + .progress = udbg_progress, + .init_IRQ = uic_init_tree, + .get_irq = uic_get_irq, + .restart = ppc4xx_reset_system, +}; diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c new file mode 100644 index 0000000000..164cbcd458 --- /dev/null +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerPC 476FPE board specific routines + * + * Copyright © 2013 Tony Breeds IBM Corporation + * Copyright © 2013 Alistair Popple IBM Corporation + * + * Based on earlier code: + * Matt Porter + * Copyright 2002-2005 MontaVista Software Inc. + * + * Eugene Surovegin or + * Copyright (c) 2003-2005 Zultys Technologies + * + * Rewritten and ported to the merged powerpc tree: + * Copyright 2007 David Gibson , IBM Corporation. + * Copyright © 2011 David Kliekamp IBM Corporation + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static const struct of_device_id ppc47x_of_bus[] __initconst = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,plb6", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + {}, +}; + +/* The EEPROM is missing and the default values are bogus. This forces USB in + * to EHCI mode */ +static void quirk_ppc_currituck_usb_fixup(struct pci_dev *dev) +{ + if (of_machine_is_compatible("ibm,currituck")) { + pci_write_config_dword(dev, 0xe0, 0x0114231f); + pci_write_config_dword(dev, 0xe4, 0x00006c40); + } +} +DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup); + +/* Akebono has an AVR microcontroller attached to the I2C bus + * which is used to power off/reset the system. */ + +/* AVR I2C Commands */ +#define AVR_PWRCTL_CMD (0x26) + +/* Flags for the power control I2C commands */ +#define AVR_PWRCTL_PWROFF (0x01) +#define AVR_PWRCTL_RESET (0x02) + +static struct i2c_client *avr_i2c_client; +static void __noreturn avr_halt_system(int pwrctl_flags) +{ + /* Request the AVR to reset the system */ + i2c_smbus_write_byte_data(avr_i2c_client, + AVR_PWRCTL_CMD, pwrctl_flags); + + /* Wait for system to be reset */ + while (1) + ; +} + +static void avr_power_off_system(void) +{ + avr_halt_system(AVR_PWRCTL_PWROFF); +} + +static void __noreturn avr_reset_system(char *cmd) +{ + avr_halt_system(AVR_PWRCTL_RESET); +} + +static int avr_probe(struct i2c_client *client) +{ + avr_i2c_client = client; + ppc_md.restart = avr_reset_system; + pm_power_off = avr_power_off_system; + return 0; +} + +static const struct i2c_device_id avr_id[] = { + { "akebono-avr", 0 }, + { } +}; + +static struct i2c_driver avr_driver = { + .driver = { + .name = "akebono-avr", + }, + .probe = avr_probe, + .id_table = avr_id, +}; + +static int __init ppc47x_device_probe(void) +{ + i2c_add_driver(&avr_driver); + of_platform_bus_probe(NULL, ppc47x_of_bus, NULL); + + return 0; +} +machine_device_initcall(ppc47x_akebono, ppc47x_device_probe); +machine_device_initcall(ppc47x_currituck, ppc47x_device_probe); + +static void __init ppc47x_init_irq(void) +{ + struct device_node *np; + + /* Find top level interrupt controller */ + for_each_node_with_property(np, "interrupt-controller") { + if (!of_property_present(np, "interrupts")) + break; + } + if (np == NULL) + panic("Can't find top level interrupt controller"); + + /* Check type and do appropriate initialization */ + if (of_device_is_compatible(np, "chrp,open-pic")) { + /* The MPIC driver will get everything it needs from the + * device-tree, just pass 0 to all arguments + */ + struct mpic *mpic = + mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); + ppc_md.get_irq = mpic_get_irq; + } else + panic("Unrecognized top level interrupt controller"); + + of_node_put(np); +} + +#ifdef CONFIG_SMP +static void smp_ppc47x_setup_cpu(int cpu) +{ + mpic_setup_this_cpu(); +} + +static int smp_ppc47x_kick_cpu(int cpu) +{ + struct device_node *cpunode = of_get_cpu_node(cpu, NULL); + const u64 *spin_table_addr_prop; + u32 *spin_table; + extern void start_secondary_47x(void); + + BUG_ON(cpunode == NULL); + + /* Assume spin table. We could test for the enable-method in + * the device-tree but currently there's little point as it's + * our only supported method + */ + spin_table_addr_prop = + of_get_property(cpunode, "cpu-release-addr", NULL); + + if (spin_table_addr_prop == NULL) { + pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", + cpu); + return 1; + } + + /* Assume it's mapped as part of the linear mapping. This is a bit + * fishy but will work fine for now + * + * XXX: Is there any reason to assume differently? + */ + spin_table = (u32 *)__va(*spin_table_addr_prop); + pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table); + + spin_table[3] = cpu; + smp_wmb(); + spin_table[1] = __pa(start_secondary_47x); + mb(); + + return 0; +} + +static struct smp_ops_t ppc47x_smp_ops = { + .probe = smp_mpic_probe, + .message_pass = smp_mpic_message_pass, + .setup_cpu = smp_ppc47x_setup_cpu, + .kick_cpu = smp_ppc47x_kick_cpu, + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +}; + +static void __init ppc47x_smp_init(void) +{ + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + smp_ops = &ppc47x_smp_ops; +} + +#else /* CONFIG_SMP */ +static void __init ppc47x_smp_init(void) { } +#endif /* CONFIG_SMP */ + +static void __init ppc47x_setup_arch(void) +{ + + /* No need to check the DMA config as we /know/ our windows are all of + * RAM. Lets hope that doesn't change */ + swiotlb_detect_4g(); + + ppc47x_smp_init(); +} + +static int board_rev = -1; +static int __init ppc47x_get_board_rev(void) +{ + int reg; + u8 __iomem *fpga; + struct device_node *np = NULL; + + if (of_machine_is_compatible("ibm,currituck")) { + np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga"); + reg = 0; + } else if (of_machine_is_compatible("ibm,akebono")) { + np = of_find_compatible_node(NULL, NULL, "ibm,akebono-fpga"); + reg = 2; + } + + if (!np) + goto fail; + + fpga = of_iomap(np, 0); + of_node_put(np); + if (!fpga) + goto fail; + + board_rev = ioread8(fpga + reg) & 0x03; + pr_info("%s: Found board revision %d\n", __func__, board_rev); + iounmap(fpga); + return 0; + +fail: + pr_info("%s: Unable to find board revision\n", __func__); + return 0; +} +machine_arch_initcall(ppc47x_akebono, ppc47x_get_board_rev); +machine_arch_initcall(ppc47x_currituck, ppc47x_get_board_rev); + +/* Use USB controller should have been hardware swizzled but it wasn't :( */ +static void ppc47x_pci_irq_fixup(struct pci_dev *dev) +{ + if (dev->vendor == 0x1033 && (dev->device == 0x0035 || + dev->device == 0x00e0)) { + if (board_rev == 0) { + dev->irq = irq_create_mapping(NULL, 47); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else if (board_rev == 2) { + dev->irq = irq_create_mapping(NULL, 49); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else { + pr_alert("%s: Unknown board revision\n", __func__); + } + } +} + +define_machine(ppc47x_akebono) { + .name = "PowerPC 47x (akebono)", + .compatible = "ibm,akebono", + .progress = udbg_progress, + .init_IRQ = ppc47x_init_irq, + .setup_arch = ppc47x_setup_arch, + .restart = ppc4xx_reset_system, +}; + +define_machine(ppc47x_currituck) { + .name = "PowerPC 47x (currituck)", + .compatible = "ibm,currituck", + .progress = udbg_progress, + .init_IRQ = ppc47x_init_irq, + .pci_irq_fixup = ppc47x_pci_irq_fixup, + .setup_arch = ppc47x_setup_arch, + .restart = ppc4xx_reset_system, +}; diff --git a/arch/powerpc/platforms/44x/ppc476_modules.lds b/arch/powerpc/platforms/44x/ppc476_modules.lds new file mode 100644 index 0000000000..9fec5d34ba --- /dev/null +++ b/arch/powerpc/platforms/44x/ppc476_modules.lds @@ -0,0 +1,15 @@ +SECTIONS +{ + .text : ALIGN(4096) + { + *(.text .text.* .fixup) + } + .init.text : ALIGN(4096) + { + *(.init.text .init.text.*) + } + .exit.text : ALIGN(4096) + { + *(.exit.text .exit.text.*) + } +} diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c new file mode 100644 index 0000000000..5cdaa4068e --- /dev/null +++ b/arch/powerpc/platforms/44x/sam440ep.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Sam440ep board specific routines based off bamboo.c code + * original copyrights below + * + * Wade Farnsworth + * Copyright 2004 MontaVista Software Inc. + * + * Rewritten and ported to the merged powerpc tree: + * Josh Boyer + * Copyright 2007 IBM Corporation + * + * Modified from bamboo.c for sam440ep: + * Copyright 2008 Giuseppe Coviello + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static const struct of_device_id sam440ep_of_bus[] __initconst = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + {}, +}; + +static int __init sam440ep_device_probe(void) +{ + of_platform_bus_probe(NULL, sam440ep_of_bus, NULL); + + return 0; +} +machine_device_initcall(sam440ep, sam440ep_device_probe); + +static int __init sam440ep_probe(void) +{ + pci_set_flags(PCI_REASSIGN_ALL_RSRC); + + return 1; +} + +define_machine(sam440ep) { + .name = "Sam440ep", + .compatible = "acube,sam440ep", + .probe = sam440ep_probe, + .progress = udbg_progress, + .init_IRQ = uic_init_tree, + .get_irq = uic_get_irq, + .restart = ppc4xx_reset_system, +}; + +static struct i2c_board_info sam440ep_rtc_info = { + .type = "m41st85", + .addr = 0x68, + .irq = -1, +}; + +static int __init sam440ep_setup_rtc(void) +{ + return i2c_register_board_info(0, &sam440ep_rtc_info, 1); +} +machine_device_initcall(sam440ep, sam440ep_setup_rtc); diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c new file mode 100644 index 0000000000..bf0188dcb9 --- /dev/null +++ b/arch/powerpc/platforms/44x/warp.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PIKA Warp(tm) board specific routines + * + * Copyright (c) 2008-2009 PIKA Technologies + * Sean MacLennan + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + + +static const struct of_device_id warp_of_bus[] __initconst = { + { .compatible = "ibm,plb4", }, + { .compatible = "ibm,opb", }, + { .compatible = "ibm,ebc", }, + {}, +}; + +static int __init warp_device_probe(void) +{ + of_platform_bus_probe(NULL, warp_of_bus, NULL); + return 0; +} +machine_device_initcall(warp, warp_device_probe); + +define_machine(warp) { + .name = "Warp", + .compatible = "pika,warp", + .progress = udbg_progress, + .init_IRQ = uic_init_tree, + .get_irq = uic_get_irq, + .restart = ppc4xx_reset_system, +}; + + +static int __init warp_post_info(void) +{ + struct device_node *np; + void __iomem *fpga; + u32 post1, post2; + + /* Sighhhh... POST information is in the sd area. */ + np = of_find_compatible_node(NULL, NULL, "pika,fpga-sd"); + if (np == NULL) + return -ENOENT; + + fpga = of_iomap(np, 0); + of_node_put(np); + if (fpga == NULL) + return -ENOENT; + + post1 = in_be32(fpga + 0x40); + post2 = in_be32(fpga + 0x44); + + iounmap(fpga); + + if (post1 || post2) + printk(KERN_INFO "Warp POST %08x %08x\n", post1, post2); + else + printk(KERN_INFO "Warp POST OK\n"); + + return 0; +} + + +#ifdef CONFIG_SENSORS_AD7414 + +static void __iomem *dtm_fpga; + +#define WARP_GREEN_LED 0 +#define WARP_RED_LED 1 + +static struct gpio_led warp_gpio_led_pins[] = { + [WARP_GREEN_LED] = { + .name = "green", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, + [WARP_RED_LED] = { + .name = "red", + .default_state = LEDS_DEFSTATE_KEEP, + .gpiod = NULL, /* to be filled by pika_setup_leds() */ + }, +}; + +static struct gpio_led_platform_data warp_gpio_led_data = { + .leds = warp_gpio_led_pins, + .num_leds = ARRAY_SIZE(warp_gpio_led_pins), +}; + +static struct platform_device warp_gpio_leds = { + .name = "leds-gpio", + .id = -1, + .dev = { + .platform_data = &warp_gpio_led_data, + }, +}; + +static irqreturn_t temp_isr(int irq, void *context) +{ + int value = 1; + + local_irq_disable(); + + gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0); + + printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n"); + + while (1) { + if (dtm_fpga) { + unsigned reset = in_be32(dtm_fpga + 0x14); + out_be32(dtm_fpga + 0x14, reset); + } + + gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value); + value ^= 1; + mdelay(500); + } + + /* Not reached */ + return IRQ_HANDLED; +} + +/* + * Because green and red power LEDs are normally driven by leds-gpio driver, + * but in case of critical temperature shutdown we want to drive them + * ourselves, we acquire both and then create leds-gpio platform device + * ourselves, instead of doing it through device tree. This way we can still + * keep access to the gpios and use them when needed. + */ +static int pika_setup_leds(void) +{ + struct device_node *np, *child; + struct gpio_desc *gpio; + struct gpio_led *led; + int led_count = 0; + int error; + int i; + + np = of_find_compatible_node(NULL, NULL, "warp-power-leds"); + if (!np) { + printk(KERN_ERR __FILE__ ": Unable to find leds\n"); + return -ENOENT; + } + + for_each_child_of_node(np, child) { + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + + if (!of_node_name_eq(child, led->name)) + continue; + + if (led->gpiod) { + printk(KERN_ERR __FILE__ ": %s led has already been defined\n", + led->name); + continue; + } + + gpio = fwnode_gpiod_get_index(of_fwnode_handle(child), + NULL, 0, GPIOD_ASIS, + led->name); + error = PTR_ERR_OR_ZERO(gpio); + if (error) { + printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n", + led->name, error); + of_node_put(child); + goto err_cleanup_pins; + } + + led->gpiod = gpio; + led_count++; + } + } + + of_node_put(np); + + /* Skip device registration if no leds have been defined */ + if (led_count) { + error = platform_device_register(&warp_gpio_leds); + if (error) { + printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n", + error); + goto err_cleanup_pins; + } + } + + return 0; + +err_cleanup_pins: + for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) { + led = &warp_gpio_led_pins[i]; + gpiod_put(led->gpiod); + led->gpiod = NULL; + } + return error; +} + +static void pika_setup_critical_temp(struct device_node *np, + struct i2c_client *client) +{ + int irq, rc; + + /* Do this before enabling critical temp interrupt since we + * may immediately interrupt. + */ + pika_setup_leds(); + + /* These registers are in 1 degree increments. */ + i2c_smbus_write_byte_data(client, 2, 65); /* Thigh */ + i2c_smbus_write_byte_data(client, 3, 0); /* Tlow */ + + irq = irq_of_parse_and_map(np, 0); + if (!irq) { + printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n"); + return; + } + + rc = request_irq(irq, temp_isr, 0, "ad7414", NULL); + if (rc) { + printk(KERN_ERR __FILE__ + ": Unable to request ad7414 irq %d = %d\n", irq, rc); + return; + } +} + +static inline void pika_dtm_check_fan(void __iomem *fpga) +{ + static int fan_state; + u32 fan = in_be32(fpga + 0x34) & (1 << 14); + + if (fan_state != fan) { + fan_state = fan; + if (fan) + printk(KERN_WARNING "Fan rotation error detected." + " Please check hardware.\n"); + } +} + +static int pika_dtm_thread(void __iomem *fpga) +{ + struct device_node *np; + struct i2c_client *client; + + np = of_find_compatible_node(NULL, NULL, "adi,ad7414"); + if (np == NULL) + return -ENOENT; + + client = of_find_i2c_device_by_node(np); + if (client == NULL) { + of_node_put(np); + return -ENOENT; + } + + pika_setup_critical_temp(np, client); + + of_node_put(np); + + printk(KERN_INFO "Warp DTM thread running.\n"); + + while (!kthread_should_stop()) { + int val; + + val = i2c_smbus_read_word_data(client, 0); + if (val < 0) + dev_dbg(&client->dev, "DTM read temp failed.\n"); + else { + s16 temp = swab16(val); + out_be32(fpga + 0x20, temp); + } + + pika_dtm_check_fan(fpga); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } + + return 0; +} + +static int __init pika_dtm_start(void) +{ + struct task_struct *dtm_thread; + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "pika,fpga"); + if (np == NULL) + return -ENOENT; + + dtm_fpga = of_iomap(np, 0); + of_node_put(np); + if (dtm_fpga == NULL) + return -ENOENT; + + /* Must get post info before thread starts. */ + warp_post_info(); + + dtm_thread = kthread_run(pika_dtm_thread, dtm_fpga, "pika-dtm"); + if (IS_ERR(dtm_thread)) { + iounmap(dtm_fpga); + return PTR_ERR(dtm_thread); + } + + return 0; +} +machine_late_initcall(warp, pika_dtm_start); + +#else /* !CONFIG_SENSORS_AD7414 */ + +machine_late_initcall(warp, warp_post_info); + +#endif diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile new file mode 100644 index 0000000000..2071a0abe0 --- /dev/null +++ b/arch/powerpc/platforms/4xx/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += uic.o machine_check.o +obj-$(CONFIG_4xx_SOC) += soc.o +obj-$(CONFIG_PCI) += pci.o +obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o +obj-$(CONFIG_PPC4xx_CPM) += cpm.o +obj-$(CONFIG_PPC4xx_GPIO) += gpio.o diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c new file mode 100644 index 0000000000..670f8ad446 --- /dev/null +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerPC 4xx Clock and Power Management + * + * Copyright (C) 2010, Applied Micro Circuits Corporation + * Victor Gallardo (vgallardo@apm.com) + * + * Based on arch/powerpc/platforms/44x/idle.c: + * Jerone Young + * Copyright 2008 IBM Corp. + * + * Based on arch/powerpc/sysdev/fsl_pmc.c: + * Anton Vorontsov + * Copyright 2009 MontaVista Software, Inc. + * + * See file CREDITS for list of people who contributed to this + * project. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPM_ER 0 +#define CPM_FR 1 +#define CPM_SR 2 + +#define CPM_IDLE_WAIT 0 +#define CPM_IDLE_DOZE 1 + +struct cpm { + dcr_host_t dcr_host; + unsigned int dcr_offset[3]; + unsigned int powersave_off; + unsigned int unused; + unsigned int idle_doze; + unsigned int standby; + unsigned int suspend; +}; + +static struct cpm cpm; + +struct cpm_idle_mode { + unsigned int enabled; + const char *name; +}; + +static struct cpm_idle_mode idle_mode[] = { + [CPM_IDLE_WAIT] = { 1, "wait" }, /* default */ + [CPM_IDLE_DOZE] = { 0, "doze" }, +}; + +static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask) +{ + unsigned int value; + + /* CPM controller supports 3 different types of sleep interface + * known as class 1, 2 and 3. For class 1 units, they are + * unconditionally put to sleep when the corresponding CPM bit is + * set. For class 2 and 3 units this is not case; if they can be + * put to sleep, they will. Here we do not verify, we just + * set them and expect them to eventually go off when they can. + */ + value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]); + dcr_write(cpm.dcr_host, cpm.dcr_offset[cpm_reg], value | mask); + + /* return old state, to restore later if needed */ + return value; +} + +static void cpm_idle_wait(void) +{ + unsigned long msr_save; + + /* save off initial state */ + msr_save = mfmsr(); + /* sync required when CPM0_ER[CPU] is set */ + mb(); + /* set wait state MSR */ + mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE); + isync(); + /* return to initial state */ + mtmsr(msr_save); + isync(); +} + +static void cpm_idle_sleep(unsigned int mask) +{ + unsigned int er_save; + + /* update CPM_ER state */ + er_save = cpm_set(CPM_ER, mask); + + /* go to wait state so that CPM0_ER[CPU] can take effect */ + cpm_idle_wait(); + + /* restore CPM_ER state */ + dcr_write(cpm.dcr_host, cpm.dcr_offset[CPM_ER], er_save); +} + +static void cpm_idle_doze(void) +{ + cpm_idle_sleep(cpm.idle_doze); +} + +static void cpm_idle_config(int mode) +{ + int i; + + if (idle_mode[mode].enabled) + return; + + for (i = 0; i < ARRAY_SIZE(idle_mode); i++) + idle_mode[i].enabled = 0; + + idle_mode[mode].enabled = 1; +} + +static ssize_t cpm_idle_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + char *s = buf; + int i; + + for (i = 0; i < ARRAY_SIZE(idle_mode); i++) { + if (idle_mode[i].enabled) + s += sprintf(s, "[%s] ", idle_mode[i].name); + else + s += sprintf(s, "%s ", idle_mode[i].name); + } + + *(s-1) = '\n'; /* convert the last space to a newline */ + + return s - buf; +} + +static ssize_t cpm_idle_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int i; + char *p; + int len; + + p = memchr(buf, '\n', n); + len = p ? p - buf : n; + + for (i = 0; i < ARRAY_SIZE(idle_mode); i++) { + if (strncmp(buf, idle_mode[i].name, len) == 0) { + cpm_idle_config(i); + return n; + } + } + + return -EINVAL; +} + +static struct kobj_attribute cpm_idle_attr = + __ATTR(idle, 0644, cpm_idle_show, cpm_idle_store); + +static void __init cpm_idle_config_sysfs(void) +{ + struct device *dev; + unsigned long ret; + + dev = get_cpu_device(0); + + ret = sysfs_create_file(&dev->kobj, + &cpm_idle_attr.attr); + if (ret) + printk(KERN_WARNING + "cpm: failed to create idle sysfs entry\n"); +} + +static void cpm_idle(void) +{ + if (idle_mode[CPM_IDLE_DOZE].enabled) + cpm_idle_doze(); + else + cpm_idle_wait(); +} + +static int cpm_suspend_valid(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_STANDBY: + return !!cpm.standby; + case PM_SUSPEND_MEM: + return !!cpm.suspend; + default: + return 0; + } +} + +static void cpm_suspend_standby(unsigned int mask) +{ + unsigned long tcr_save; + + /* disable decrement interrupt */ + tcr_save = mfspr(SPRN_TCR); + mtspr(SPRN_TCR, tcr_save & ~TCR_DIE); + + /* go to sleep state */ + cpm_idle_sleep(mask); + + /* restore decrement interrupt */ + mtspr(SPRN_TCR, tcr_save); +} + +static int cpm_suspend_enter(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_STANDBY: + cpm_suspend_standby(cpm.standby); + break; + case PM_SUSPEND_MEM: + cpm_suspend_standby(cpm.suspend); + break; + } + + return 0; +} + +static const struct platform_suspend_ops cpm_suspend_ops = { + .valid = cpm_suspend_valid, + .enter = cpm_suspend_enter, +}; + +static int __init cpm_get_uint_property(struct device_node *np, + const char *name) +{ + int len; + const unsigned int *prop = of_get_property(np, name, &len); + + if (prop == NULL || len < sizeof(u32)) + return 0; + + return *prop; +} + +static int __init cpm_init(void) +{ + struct device_node *np; + int dcr_base, dcr_len; + int ret = 0; + + if (!cpm.powersave_off) { + cpm_idle_config(CPM_IDLE_WAIT); + ppc_md.power_save = &cpm_idle; + } + + np = of_find_compatible_node(NULL, NULL, "ibm,cpm"); + if (!np) { + ret = -EINVAL; + goto out; + } + + dcr_base = dcr_resource_start(np, 0); + dcr_len = dcr_resource_len(np, 0); + + if (dcr_base == 0 || dcr_len == 0) { + printk(KERN_ERR "cpm: could not parse dcr property for %pOF\n", + np); + ret = -EINVAL; + goto node_put; + } + + cpm.dcr_host = dcr_map(np, dcr_base, dcr_len); + + if (!DCR_MAP_OK(cpm.dcr_host)) { + printk(KERN_ERR "cpm: failed to map dcr property for %pOF\n", + np); + ret = -EINVAL; + goto node_put; + } + + /* All 4xx SoCs with a CPM controller have one of two + * different order for the CPM registers. Some have the + * CPM registers in the following order (ER,FR,SR). The + * others have them in the following order (SR,ER,FR). + */ + + if (cpm_get_uint_property(np, "er-offset") == 0) { + cpm.dcr_offset[CPM_ER] = 0; + cpm.dcr_offset[CPM_FR] = 1; + cpm.dcr_offset[CPM_SR] = 2; + } else { + cpm.dcr_offset[CPM_ER] = 1; + cpm.dcr_offset[CPM_FR] = 2; + cpm.dcr_offset[CPM_SR] = 0; + } + + /* Now let's see what IPs to turn off for the following modes */ + + cpm.unused = cpm_get_uint_property(np, "unused-units"); + cpm.idle_doze = cpm_get_uint_property(np, "idle-doze"); + cpm.standby = cpm_get_uint_property(np, "standby"); + cpm.suspend = cpm_get_uint_property(np, "suspend"); + + /* If some IPs are unused let's turn them off now */ + + if (cpm.unused) { + cpm_set(CPM_ER, cpm.unused); + cpm_set(CPM_FR, cpm.unused); + } + + /* Now let's export interfaces */ + + if (!cpm.powersave_off && cpm.idle_doze) + cpm_idle_config_sysfs(); + + if (cpm.standby || cpm.suspend) + suspend_set_ops(&cpm_suspend_ops); +node_put: + of_node_put(np); +out: + return ret; +} + +late_initcall(cpm_init); + +static int __init cpm_powersave_off(char *arg) +{ + cpm.powersave_off = 1; + return 1; +} +__setup("powersave=off", cpm_powersave_off); diff --git a/arch/powerpc/platforms/4xx/gpio.c b/arch/powerpc/platforms/4xx/gpio.c new file mode 100644 index 0000000000..e5f2319e5c --- /dev/null +++ b/arch/powerpc/platforms/4xx/gpio.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PPC4xx gpio driver + * + * Copyright (c) 2008 Harris Corporation + * Copyright (c) 2008 Sascha Hauer , Pengutronix + * Copyright (c) MontaVista Software, Inc. 2008. + * + * Author: Steve Falco + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GPIO_MASK(gpio) (0x80000000 >> (gpio)) +#define GPIO_MASK2(gpio) (0xc0000000 >> ((gpio) * 2)) + +/* Physical GPIO register layout */ +struct ppc4xx_gpio { + __be32 or; + __be32 tcr; + __be32 osrl; + __be32 osrh; + __be32 tsrl; + __be32 tsrh; + __be32 odr; + __be32 ir; + __be32 rr1; + __be32 rr2; + __be32 rr3; + __be32 reserved1; + __be32 isr1l; + __be32 isr1h; + __be32 isr2l; + __be32 isr2h; + __be32 isr3l; + __be32 isr3h; +}; + +struct ppc4xx_gpio_chip { + struct of_mm_gpio_chip mm_gc; + spinlock_t lock; +}; + +/* + * GPIO LIB API implementation for GPIOs + * + * There are a maximum of 32 gpios in each gpio controller. + */ + +static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct ppc4xx_gpio __iomem *regs = mm_gc->regs; + + return !!(in_be32(®s->ir) & GPIO_MASK(gpio)); +} + +static inline void +__ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct ppc4xx_gpio __iomem *regs = mm_gc->regs; + + if (val) + setbits32(®s->or, GPIO_MASK(gpio)); + else + clrbits32(®s->or, GPIO_MASK(gpio)); +} + +static void +ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc); + unsigned long flags; + + spin_lock_irqsave(&chip->lock, flags); + + __ppc4xx_gpio_set(gc, gpio, val); + + spin_unlock_irqrestore(&chip->lock, flags); + + pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val); +} + +static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc); + struct ppc4xx_gpio __iomem *regs = mm_gc->regs; + unsigned long flags; + + spin_lock_irqsave(&chip->lock, flags); + + /* Disable open-drain function */ + clrbits32(®s->odr, GPIO_MASK(gpio)); + + /* Float the pin */ + clrbits32(®s->tcr, GPIO_MASK(gpio)); + + /* Bits 0-15 use TSRL/OSRL, bits 16-31 use TSRH/OSRH */ + if (gpio < 16) { + clrbits32(®s->osrl, GPIO_MASK2(gpio)); + clrbits32(®s->tsrl, GPIO_MASK2(gpio)); + } else { + clrbits32(®s->osrh, GPIO_MASK2(gpio)); + clrbits32(®s->tsrh, GPIO_MASK2(gpio)); + } + + spin_unlock_irqrestore(&chip->lock, flags); + + return 0; +} + +static int +ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc); + struct ppc4xx_gpio __iomem *regs = mm_gc->regs; + unsigned long flags; + + spin_lock_irqsave(&chip->lock, flags); + + /* First set initial value */ + __ppc4xx_gpio_set(gc, gpio, val); + + /* Disable open-drain function */ + clrbits32(®s->odr, GPIO_MASK(gpio)); + + /* Drive the pin */ + setbits32(®s->tcr, GPIO_MASK(gpio)); + + /* Bits 0-15 use TSRL, bits 16-31 use TSRH */ + if (gpio < 16) { + clrbits32(®s->osrl, GPIO_MASK2(gpio)); + clrbits32(®s->tsrl, GPIO_MASK2(gpio)); + } else { + clrbits32(®s->osrh, GPIO_MASK2(gpio)); + clrbits32(®s->tsrh, GPIO_MASK2(gpio)); + } + + spin_unlock_irqrestore(&chip->lock, flags); + + pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val); + + return 0; +} + +static int __init ppc4xx_add_gpiochips(void) +{ + struct device_node *np; + + for_each_compatible_node(np, NULL, "ibm,ppc4xx-gpio") { + int ret; + struct ppc4xx_gpio_chip *ppc4xx_gc; + struct of_mm_gpio_chip *mm_gc; + struct gpio_chip *gc; + + ppc4xx_gc = kzalloc(sizeof(*ppc4xx_gc), GFP_KERNEL); + if (!ppc4xx_gc) { + ret = -ENOMEM; + goto err; + } + + spin_lock_init(&ppc4xx_gc->lock); + + mm_gc = &ppc4xx_gc->mm_gc; + gc = &mm_gc->gc; + + gc->ngpio = 32; + gc->direction_input = ppc4xx_gpio_dir_in; + gc->direction_output = ppc4xx_gpio_dir_out; + gc->get = ppc4xx_gpio_get; + gc->set = ppc4xx_gpio_set; + + ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc); + if (ret) + goto err; + continue; +err: + pr_err("%pOF: registration failed with status %d\n", np, ret); + kfree(ppc4xx_gc); + /* try others anyway */ + } + return 0; +} +arch_initcall(ppc4xx_add_gpiochips); diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c new file mode 100644 index 0000000000..c6bd846b0d --- /dev/null +++ b/arch/powerpc/platforms/4xx/hsta_msi.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MSI support for PPC4xx SoCs using High Speed Transfer Assist (HSTA) for + * generation of the interrupt. + * + * Copyright © 2013 Alistair Popple IBM Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ppc4xx_hsta_msi { + struct device *dev; + + /* The ioremapped HSTA MSI IO space */ + u32 __iomem *data; + + /* Physical address of HSTA MSI IO space */ + u64 address; + struct msi_bitmap bmp; + + /* An array mapping offsets to hardware IRQs */ + int *irq_map; + + /* Number of hwirqs supported */ + int irq_count; +}; +static struct ppc4xx_hsta_msi ppc4xx_hsta_msi; + +static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + struct msi_msg msg; + struct msi_desc *entry; + int irq, hwirq; + u64 addr; + + /* We don't support MSI-X */ + if (type == PCI_CAP_ID_MSIX) { + pr_debug("%s: MSI-X not supported.\n", __func__); + return -EINVAL; + } + + msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) { + irq = msi_bitmap_alloc_hwirqs(&ppc4xx_hsta_msi.bmp, 1); + if (irq < 0) { + pr_debug("%s: Failed to allocate msi interrupt\n", + __func__); + return irq; + } + + hwirq = ppc4xx_hsta_msi.irq_map[irq]; + if (!hwirq) { + pr_err("%s: Failed mapping irq %d\n", __func__, irq); + return -EINVAL; + } + + /* + * HSTA generates interrupts on writes to 128-bit aligned + * addresses. + */ + addr = ppc4xx_hsta_msi.address + irq*0x10; + msg.address_hi = upper_32_bits(addr); + msg.address_lo = lower_32_bits(addr); + + /* Data is not used by the HSTA. */ + msg.data = 0; + + pr_debug("%s: Setup irq %d (0x%0llx)\n", __func__, hwirq, + (((u64) msg.address_hi) << 32) | msg.address_lo); + + if (irq_set_msi_desc(hwirq, entry)) { + pr_err( + "%s: Invalid hwirq %d specified in device tree\n", + __func__, hwirq); + msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1); + return -EINVAL; + } + pci_write_msi_msg(hwirq, &msg); + } + + return 0; +} + +static int hsta_find_hwirq_offset(int hwirq) +{ + int irq; + + /* Find the offset given the hwirq */ + for (irq = 0; irq < ppc4xx_hsta_msi.irq_count; irq++) + if (ppc4xx_hsta_msi.irq_map[irq] == hwirq) + return irq; + + return -EINVAL; +} + +static void hsta_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + int irq; + + msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) { + irq = hsta_find_hwirq_offset(entry->irq); + + /* entry->irq should always be in irq_map */ + BUG_ON(irq < 0); + irq_set_msi_desc(entry->irq, NULL); + msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1); + pr_debug("%s: Teardown IRQ %u (index %u)\n", __func__, + entry->irq, irq); + entry->irq = 0; + } +} + +static int hsta_msi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *mem; + int irq, ret, irq_count; + struct pci_controller *phb; + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(dev, "Unable to get mmio space\n"); + return -EINVAL; + } + + irq_count = of_irq_count(dev->of_node); + if (!irq_count) { + dev_err(dev, "Unable to find IRQ range\n"); + return -EINVAL; + } + + ppc4xx_hsta_msi.dev = dev; + ppc4xx_hsta_msi.address = mem->start; + ppc4xx_hsta_msi.data = ioremap(mem->start, resource_size(mem)); + ppc4xx_hsta_msi.irq_count = irq_count; + if (!ppc4xx_hsta_msi.data) { + dev_err(dev, "Unable to map memory\n"); + return -ENOMEM; + } + + ret = msi_bitmap_alloc(&ppc4xx_hsta_msi.bmp, irq_count, dev->of_node); + if (ret) + goto out; + + ppc4xx_hsta_msi.irq_map = kmalloc_array(irq_count, sizeof(int), + GFP_KERNEL); + if (!ppc4xx_hsta_msi.irq_map) { + ret = -ENOMEM; + goto out1; + } + + /* Setup a mapping from irq offsets to hardware irq numbers */ + for (irq = 0; irq < irq_count; irq++) { + ppc4xx_hsta_msi.irq_map[irq] = + irq_of_parse_and_map(dev->of_node, irq); + if (!ppc4xx_hsta_msi.irq_map[irq]) { + dev_err(dev, "Unable to map IRQ\n"); + ret = -EINVAL; + goto out2; + } + } + + list_for_each_entry(phb, &hose_list, list_node) { + phb->controller_ops.setup_msi_irqs = hsta_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = hsta_teardown_msi_irqs; + } + return 0; + +out2: + kfree(ppc4xx_hsta_msi.irq_map); + +out1: + msi_bitmap_free(&ppc4xx_hsta_msi.bmp); + +out: + iounmap(ppc4xx_hsta_msi.data); + return ret; +} + +static const struct of_device_id hsta_msi_ids[] = { + { + .compatible = "ibm,hsta-msi", + }, + {} +}; + +static struct platform_driver hsta_msi_driver = { + .probe = hsta_msi_probe, + .driver = { + .name = "hsta-msi", + .of_match_table = hsta_msi_ids, + }, +}; + +static int hsta_msi_init(void) +{ + return platform_driver_register(&hsta_msi_driver); +} +subsys_initcall(hsta_msi_init); diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c new file mode 100644 index 0000000000..a905da1d6f --- /dev/null +++ b/arch/powerpc/platforms/4xx/machine_check.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + */ + +#include +#include +#include + +#include + +int machine_check_4xx(struct pt_regs *regs) +{ + unsigned long reason = regs->esr; + + if (reason & ESR_IMCP) { + printk("Instruction"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } else + printk("Data"); + printk(" machine check in kernel mode.\n"); + + return 0; +} diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c new file mode 100644 index 0000000000..48626615b1 --- /dev/null +++ b/arch/powerpc/platforms/4xx/pci.c @@ -0,0 +1,2182 @@ +/* + * PCI / PCI-X / PCI-Express support for 4xx parts + * + * Copyright 2007 Ben. Herrenschmidt , IBM Corp. + * + * Most PCI Express code is coming from Stefan Roese implementation for + * arch/ppc in the Denx tree, slightly reworked by me. + * + * Copyright 2007 DENX Software Engineering, Stefan Roese + * + * Some of that comes itself from a previous implementation for 440SPE only + * by Roland Dreier: + * + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Roland Dreier + * + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +static int dma_offset_set; + +#define U64_TO_U32_LOW(val) ((u32)((val) & 0x00000000ffffffffULL)) +#define U64_TO_U32_HIGH(val) ((u32)((val) >> 32)) + +#define RES_TO_U32_LOW(val) \ + ((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_LOW(val) : (val)) +#define RES_TO_U32_HIGH(val) \ + ((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_HIGH(val) : (0)) + +static inline int ppc440spe_revA(void) +{ + /* Catch both 440SPe variants, with and without RAID6 support */ + if ((mfspr(SPRN_PVR) & 0xffefffff) == 0x53421890) + return 1; + else + return 0; +} + +static void fixup_ppc4xx_pci_bridge(struct pci_dev *dev) +{ + struct pci_controller *hose; + struct resource *r; + + if (dev->devfn != 0 || dev->bus->self != NULL) + return; + + hose = pci_bus_to_host(dev->bus); + if (hose == NULL) + return; + + if (!of_device_is_compatible(hose->dn, "ibm,plb-pciex") && + !of_device_is_compatible(hose->dn, "ibm,plb-pcix") && + !of_device_is_compatible(hose->dn, "ibm,plb-pci")) + return; + + if (of_device_is_compatible(hose->dn, "ibm,plb440epx-pci") || + of_device_is_compatible(hose->dn, "ibm,plb440grx-pci")) { + hose->indirect_type |= PPC_INDIRECT_TYPE_BROKEN_MRM; + } + + /* Hide the PCI host BARs from the kernel as their content doesn't + * fit well in the resource management + */ + pci_dev_for_each_resource(dev, r) { + r->start = r->end = 0; + r->flags = 0; + } + + printk(KERN_INFO "PCI: Hiding 4xx host bridge resources %s\n", + pci_name(dev)); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, fixup_ppc4xx_pci_bridge); + +static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, + void __iomem *reg, + struct resource *res) +{ + u64 size; + const u32 *ranges; + int rlen; + int pna = of_n_addr_cells(hose->dn); + int np = pna + 5; + + /* Default */ + res->start = 0; + size = 0x80000000; + res->end = size - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; + + /* Get dma-ranges property */ + ranges = of_get_property(hose->dn, "dma-ranges", &rlen); + if (ranges == NULL) + goto out; + + /* Walk it */ + while ((rlen -= np * 4) >= 0) { + u32 pci_space = ranges[0]; + u64 pci_addr = of_read_number(ranges + 1, 2); + u64 cpu_addr = of_translate_dma_address(hose->dn, ranges + 3); + size = of_read_number(ranges + pna + 3, 2); + ranges += np; + if (cpu_addr == OF_BAD_ADDR || size == 0) + continue; + + /* We only care about memory */ + if ((pci_space & 0x03000000) != 0x02000000) + continue; + + /* We currently only support memory at 0, and pci_addr + * within 32 bits space + */ + if (cpu_addr != 0 || pci_addr > 0xffffffff) { + printk(KERN_WARNING "%pOF: Ignored unsupported dma range" + " 0x%016llx...0x%016llx -> 0x%016llx\n", + hose->dn, + pci_addr, pci_addr + size - 1, cpu_addr); + continue; + } + + /* Check if not prefetchable */ + if (!(pci_space & 0x40000000)) + res->flags &= ~IORESOURCE_PREFETCH; + + + /* Use that */ + res->start = pci_addr; + /* Beware of 32 bits resources */ + if (sizeof(resource_size_t) == sizeof(u32) && + (pci_addr + size) > 0x100000000ull) + res->end = 0xffffffff; + else + res->end = res->start + size - 1; + break; + } + + /* We only support one global DMA offset */ + if (dma_offset_set && pci_dram_offset != res->start) { + printk(KERN_ERR "%pOF: dma-ranges(s) mismatch\n", hose->dn); + return -ENXIO; + } + + /* Check that we can fit all of memory as we don't support + * DMA bounce buffers + */ + if (size < total_memory) { + printk(KERN_ERR "%pOF: dma-ranges too small " + "(size=%llx total_memory=%llx)\n", + hose->dn, size, (u64)total_memory); + return -ENXIO; + } + + /* Check we are a power of 2 size and that base is a multiple of size*/ + if ((size & (size - 1)) != 0 || + (res->start & (size - 1)) != 0) { + printk(KERN_ERR "%pOF: dma-ranges unaligned\n", hose->dn); + return -ENXIO; + } + + /* Check that we are fully contained within 32 bits space if we are not + * running on a 460sx or 476fpe which have 64 bit bus addresses. + */ + if (res->end > 0xffffffff && + !(of_device_is_compatible(hose->dn, "ibm,plb-pciex-460sx") + || of_device_is_compatible(hose->dn, "ibm,plb-pciex-476fpe"))) { + printk(KERN_ERR "%pOF: dma-ranges outside of 32 bits space\n", + hose->dn); + return -ENXIO; + } + out: + dma_offset_set = 1; + pci_dram_offset = res->start; + hose->dma_window_base_cur = res->start; + hose->dma_window_size = resource_size(res); + + printk(KERN_INFO "4xx PCI DMA offset set to 0x%08lx\n", + pci_dram_offset); + printk(KERN_INFO "4xx PCI DMA window base to 0x%016llx\n", + (unsigned long long)hose->dma_window_base_cur); + printk(KERN_INFO "DMA window size 0x%016llx\n", + (unsigned long long)hose->dma_window_size); + return 0; +} + +/* + * 4xx PCI 2.x part + */ + +static int __init ppc4xx_setup_one_pci_PMM(struct pci_controller *hose, + void __iomem *reg, + u64 plb_addr, + u64 pci_addr, + u64 size, + unsigned int flags, + int index) +{ + u32 ma, pcila, pciha; + + /* Hack warning ! The "old" PCI 2.x cell only let us configure the low + * 32-bit of incoming PLB addresses. The top 4 bits of the 36-bit + * address are actually hard wired to a value that appears to depend + * on the specific SoC. For example, it's 0 on 440EP and 1 on 440EPx. + * + * The trick here is we just crop those top bits and ignore them when + * programming the chip. That means the device-tree has to be right + * for the specific part used (we don't print a warning if it's wrong + * but on the other hand, you'll crash quickly enough), but at least + * this code should work whatever the hard coded value is + */ + plb_addr &= 0xffffffffull; + + /* Note: Due to the above hack, the test below doesn't actually test + * if you address is above 4G, but it tests that address and + * (address + size) are both contained in the same 4G + */ + if ((plb_addr + size) > 0xffffffffull || !is_power_of_2(size) || + size < 0x1000 || (plb_addr & (size - 1)) != 0) { + printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn); + return -1; + } + ma = (0xffffffffu << ilog2(size)) | 1; + if (flags & IORESOURCE_PREFETCH) + ma |= 2; + + pciha = RES_TO_U32_HIGH(pci_addr); + pcila = RES_TO_U32_LOW(pci_addr); + + writel(plb_addr, reg + PCIL0_PMM0LA + (0x10 * index)); + writel(pcila, reg + PCIL0_PMM0PCILA + (0x10 * index)); + writel(pciha, reg + PCIL0_PMM0PCIHA + (0x10 * index)); + writel(ma, reg + PCIL0_PMM0MA + (0x10 * index)); + + return 0; +} + +static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose, + void __iomem *reg) +{ + int i, j, found_isa_hole = 0; + + /* Setup outbound memory windows */ + for (i = j = 0; i < 3; i++) { + struct resource *res = &hose->mem_resources[i]; + resource_size_t offset = hose->mem_offset[i]; + + /* we only care about memory windows */ + if (!(res->flags & IORESOURCE_MEM)) + continue; + if (j > 2) { + printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn); + break; + } + + /* Configure the resource */ + if (ppc4xx_setup_one_pci_PMM(hose, reg, + res->start, + res->start - offset, + resource_size(res), + res->flags, + j) == 0) { + j++; + + /* If the resource PCI address is 0 then we have our + * ISA memory hole + */ + if (res->start == offset) + found_isa_hole = 1; + } + } + + /* Handle ISA memory hole if not already covered */ + if (j <= 2 && !found_isa_hole && hose->isa_mem_size) + if (ppc4xx_setup_one_pci_PMM(hose, reg, hose->isa_mem_phys, 0, + hose->isa_mem_size, 0, j) == 0) + printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n", + hose->dn); +} + +static void __init ppc4xx_configure_pci_PTMs(struct pci_controller *hose, + void __iomem *reg, + const struct resource *res) +{ + resource_size_t size = resource_size(res); + u32 sa; + + /* Calculate window size */ + sa = (0xffffffffu << ilog2(size)) | 1; + sa |= 0x1; + + /* RAM is always at 0 local for now */ + writel(0, reg + PCIL0_PTM1LA); + writel(sa, reg + PCIL0_PTM1MS); + + /* Map on PCI side */ + early_write_config_dword(hose, hose->first_busno, 0, + PCI_BASE_ADDRESS_1, res->start); + early_write_config_dword(hose, hose->first_busno, 0, + PCI_BASE_ADDRESS_2, 0x00000000); + early_write_config_word(hose, hose->first_busno, 0, + PCI_COMMAND, 0x0006); +} + +static void __init ppc4xx_probe_pci_bridge(struct device_node *np) +{ + /* NYI */ + struct resource rsrc_cfg; + struct resource rsrc_reg; + struct resource dma_window; + struct pci_controller *hose = NULL; + void __iomem *reg = NULL; + const int *bus_range; + int primary = 0; + + /* Check if device is enabled */ + if (!of_device_is_available(np)) { + printk(KERN_INFO "%pOF: Port disabled via device-tree\n", np); + return; + } + + /* Fetch config space registers address */ + if (of_address_to_resource(np, 0, &rsrc_cfg)) { + printk(KERN_ERR "%pOF: Can't get PCI config register base !", + np); + return; + } + /* Fetch host bridge internal registers address */ + if (of_address_to_resource(np, 3, &rsrc_reg)) { + printk(KERN_ERR "%pOF: Can't get PCI internal register base !", + np); + return; + } + + /* Check if primary bridge */ + if (of_property_read_bool(np, "primary")) + primary = 1; + + /* Get bus range if any */ + bus_range = of_get_property(np, "bus-range", NULL); + + /* Map registers */ + reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg)); + if (reg == NULL) { + printk(KERN_ERR "%pOF: Can't map registers !", np); + goto fail; + } + + /* Allocate the host controller data structure */ + hose = pcibios_alloc_controller(np); + if (!hose) + goto fail; + + hose->first_busno = bus_range ? bus_range[0] : 0x0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + /* Setup config space */ + setup_indirect_pci(hose, rsrc_cfg.start, rsrc_cfg.start + 0x4, 0); + + /* Disable all windows */ + writel(0, reg + PCIL0_PMM0MA); + writel(0, reg + PCIL0_PMM1MA); + writel(0, reg + PCIL0_PMM2MA); + writel(0, reg + PCIL0_PTM1MS); + writel(0, reg + PCIL0_PTM2MS); + + /* Parse outbound mapping resources */ + pci_process_bridge_OF_ranges(hose, np, primary); + + /* Parse inbound mapping resources */ + if (ppc4xx_parse_dma_ranges(hose, reg, &dma_window) != 0) + goto fail; + + /* Configure outbound ranges POMs */ + ppc4xx_configure_pci_PMMs(hose, reg); + + /* Configure inbound ranges PIMs */ + ppc4xx_configure_pci_PTMs(hose, reg, &dma_window); + + /* We don't need the registers anymore */ + iounmap(reg); + return; + + fail: + if (hose) + pcibios_free_controller(hose); + if (reg) + iounmap(reg); +} + +/* + * 4xx PCI-X part + */ + +static int __init ppc4xx_setup_one_pcix_POM(struct pci_controller *hose, + void __iomem *reg, + u64 plb_addr, + u64 pci_addr, + u64 size, + unsigned int flags, + int index) +{ + u32 lah, lal, pciah, pcial, sa; + + if (!is_power_of_2(size) || size < 0x1000 || + (plb_addr & (size - 1)) != 0) { + printk(KERN_WARNING "%pOF: Resource out of range\n", + hose->dn); + return -1; + } + + /* Calculate register values */ + lah = RES_TO_U32_HIGH(plb_addr); + lal = RES_TO_U32_LOW(plb_addr); + pciah = RES_TO_U32_HIGH(pci_addr); + pcial = RES_TO_U32_LOW(pci_addr); + sa = (0xffffffffu << ilog2(size)) | 0x1; + + /* Program register values */ + if (index == 0) { + writel(lah, reg + PCIX0_POM0LAH); + writel(lal, reg + PCIX0_POM0LAL); + writel(pciah, reg + PCIX0_POM0PCIAH); + writel(pcial, reg + PCIX0_POM0PCIAL); + writel(sa, reg + PCIX0_POM0SA); + } else { + writel(lah, reg + PCIX0_POM1LAH); + writel(lal, reg + PCIX0_POM1LAL); + writel(pciah, reg + PCIX0_POM1PCIAH); + writel(pcial, reg + PCIX0_POM1PCIAL); + writel(sa, reg + PCIX0_POM1SA); + } + + return 0; +} + +static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose, + void __iomem *reg) +{ + int i, j, found_isa_hole = 0; + + /* Setup outbound memory windows */ + for (i = j = 0; i < 3; i++) { + struct resource *res = &hose->mem_resources[i]; + resource_size_t offset = hose->mem_offset[i]; + + /* we only care about memory windows */ + if (!(res->flags & IORESOURCE_MEM)) + continue; + if (j > 1) { + printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn); + break; + } + + /* Configure the resource */ + if (ppc4xx_setup_one_pcix_POM(hose, reg, + res->start, + res->start - offset, + resource_size(res), + res->flags, + j) == 0) { + j++; + + /* If the resource PCI address is 0 then we have our + * ISA memory hole + */ + if (res->start == offset) + found_isa_hole = 1; + } + } + + /* Handle ISA memory hole if not already covered */ + if (j <= 1 && !found_isa_hole && hose->isa_mem_size) + if (ppc4xx_setup_one_pcix_POM(hose, reg, hose->isa_mem_phys, 0, + hose->isa_mem_size, 0, j) == 0) + printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n", + hose->dn); +} + +static void __init ppc4xx_configure_pcix_PIMs(struct pci_controller *hose, + void __iomem *reg, + const struct resource *res, + int big_pim, + int enable_msi_hole) +{ + resource_size_t size = resource_size(res); + u32 sa; + + /* RAM is always at 0 */ + writel(0x00000000, reg + PCIX0_PIM0LAH); + writel(0x00000000, reg + PCIX0_PIM0LAL); + + /* Calculate window size */ + sa = (0xffffffffu << ilog2(size)) | 1; + sa |= 0x1; + if (res->flags & IORESOURCE_PREFETCH) + sa |= 0x2; + if (enable_msi_hole) + sa |= 0x4; + writel(sa, reg + PCIX0_PIM0SA); + if (big_pim) + writel(0xffffffff, reg + PCIX0_PIM0SAH); + + /* Map on PCI side */ + writel(0x00000000, reg + PCIX0_BAR0H); + writel(res->start, reg + PCIX0_BAR0L); + writew(0x0006, reg + PCIX0_COMMAND); +} + +static void __init ppc4xx_probe_pcix_bridge(struct device_node *np) +{ + struct resource rsrc_cfg; + struct resource rsrc_reg; + struct resource dma_window; + struct pci_controller *hose = NULL; + void __iomem *reg = NULL; + const int *bus_range; + int big_pim, msi, primary; + + /* Fetch config space registers address */ + if (of_address_to_resource(np, 0, &rsrc_cfg)) { + printk(KERN_ERR "%pOF: Can't get PCI-X config register base !", + np); + return; + } + /* Fetch host bridge internal registers address */ + if (of_address_to_resource(np, 3, &rsrc_reg)) { + printk(KERN_ERR "%pOF: Can't get PCI-X internal register base !", + np); + return; + } + + /* Check if it supports large PIMs (440GX) */ + big_pim = of_property_read_bool(np, "large-inbound-windows"); + + /* Check if we should enable MSIs inbound hole */ + msi = of_property_read_bool(np, "enable-msi-hole"); + + /* Check if primary bridge */ + primary = of_property_read_bool(np, "primary"); + + /* Get bus range if any */ + bus_range = of_get_property(np, "bus-range", NULL); + + /* Map registers */ + reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg)); + if (reg == NULL) { + printk(KERN_ERR "%pOF: Can't map registers !", np); + goto fail; + } + + /* Allocate the host controller data structure */ + hose = pcibios_alloc_controller(np); + if (!hose) + goto fail; + + hose->first_busno = bus_range ? bus_range[0] : 0x0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + /* Setup config space */ + setup_indirect_pci(hose, rsrc_cfg.start, rsrc_cfg.start + 0x4, + PPC_INDIRECT_TYPE_SET_CFG_TYPE); + + /* Disable all windows */ + writel(0, reg + PCIX0_POM0SA); + writel(0, reg + PCIX0_POM1SA); + writel(0, reg + PCIX0_POM2SA); + writel(0, reg + PCIX0_PIM0SA); + writel(0, reg + PCIX0_PIM1SA); + writel(0, reg + PCIX0_PIM2SA); + if (big_pim) { + writel(0, reg + PCIX0_PIM0SAH); + writel(0, reg + PCIX0_PIM2SAH); + } + + /* Parse outbound mapping resources */ + pci_process_bridge_OF_ranges(hose, np, primary); + + /* Parse inbound mapping resources */ + if (ppc4xx_parse_dma_ranges(hose, reg, &dma_window) != 0) + goto fail; + + /* Configure outbound ranges POMs */ + ppc4xx_configure_pcix_POMs(hose, reg); + + /* Configure inbound ranges PIMs */ + ppc4xx_configure_pcix_PIMs(hose, reg, &dma_window, big_pim, msi); + + /* We don't need the registers anymore */ + iounmap(reg); + return; + + fail: + if (hose) + pcibios_free_controller(hose); + if (reg) + iounmap(reg); +} + +#ifdef CONFIG_PPC4xx_PCI_EXPRESS + +/* + * 4xx PCI-Express part + * + * We support 3 parts currently based on the compatible property: + * + * ibm,plb-pciex-440spe + * ibm,plb-pciex-405ex + * ibm,plb-pciex-460ex + * + * Anything else will be rejected for now as they are all subtly + * different unfortunately. + * + */ + +#define MAX_PCIE_BUS_MAPPED 0x40 + +struct ppc4xx_pciex_port +{ + struct pci_controller *hose; + struct device_node *node; + unsigned int index; + int endpoint; + int link; + int has_ibpre; + unsigned int sdr_base; + dcr_host_t dcrs; + struct resource cfg_space; + struct resource utl_regs; + void __iomem *utl_base; +}; + +static struct ppc4xx_pciex_port *ppc4xx_pciex_ports; +static unsigned int ppc4xx_pciex_port_count; + +struct ppc4xx_pciex_hwops +{ + bool want_sdr; + int (*core_init)(struct device_node *np); + int (*port_init_hw)(struct ppc4xx_pciex_port *port); + int (*setup_utl)(struct ppc4xx_pciex_port *port); + void (*check_link)(struct ppc4xx_pciex_port *port); +}; + +static struct ppc4xx_pciex_hwops *ppc4xx_pciex_hwops; + +static int __init ppc4xx_pciex_wait_on_sdr(struct ppc4xx_pciex_port *port, + unsigned int sdr_offset, + unsigned int mask, + unsigned int value, + int timeout_ms) +{ + u32 val; + + while(timeout_ms--) { + val = mfdcri(SDR0, port->sdr_base + sdr_offset); + if ((val & mask) == value) { + pr_debug("PCIE%d: Wait on SDR %x success with tm %d (%08x)\n", + port->index, sdr_offset, timeout_ms, val); + return 0; + } + msleep(1); + } + return -1; +} + +static int __init ppc4xx_pciex_port_reset_sdr(struct ppc4xx_pciex_port *port) +{ + /* Wait for reset to complete */ + if (ppc4xx_pciex_wait_on_sdr(port, PESDRn_RCSSTS, 1 << 20, 0, 10)) { + printk(KERN_WARNING "PCIE%d: PGRST failed\n", + port->index); + return -1; + } + return 0; +} + + +static void __init ppc4xx_pciex_check_link_sdr(struct ppc4xx_pciex_port *port) +{ + printk(KERN_INFO "PCIE%d: Checking link...\n", port->index); + + /* Check for card presence detect if supported, if not, just wait for + * link unconditionally. + * + * note that we don't fail if there is no link, we just filter out + * config space accesses. That way, it will be easier to implement + * hotplug later on. + */ + if (!port->has_ibpre || + !ppc4xx_pciex_wait_on_sdr(port, PESDRn_LOOP, + 1 << 28, 1 << 28, 100)) { + printk(KERN_INFO + "PCIE%d: Device detected, waiting for link...\n", + port->index); + if (ppc4xx_pciex_wait_on_sdr(port, PESDRn_LOOP, + 0x1000, 0x1000, 2000)) + printk(KERN_WARNING + "PCIE%d: Link up failed\n", port->index); + else { + printk(KERN_INFO + "PCIE%d: link is up !\n", port->index); + port->link = 1; + } + } else + printk(KERN_INFO "PCIE%d: No device detected.\n", port->index); +} + +#ifdef CONFIG_44x + +/* Check various reset bits of the 440SPe PCIe core */ +static int __init ppc440spe_pciex_check_reset(struct device_node *np) +{ + u32 valPE0, valPE1, valPE2; + int err = 0; + + /* SDR0_PEGPLLLCT1 reset */ + if (!(mfdcri(SDR0, PESDR0_PLLLCT1) & 0x01000000)) { + /* + * the PCIe core was probably already initialised + * by firmware - let's re-reset RCSSET regs + * + * -- Shouldn't we also re-reset the whole thing ? -- BenH + */ + pr_debug("PCIE: SDR0_PLLLCT1 already reset.\n"); + mtdcri(SDR0, PESDR0_440SPE_RCSSET, 0x01010000); + mtdcri(SDR0, PESDR1_440SPE_RCSSET, 0x01010000); + mtdcri(SDR0, PESDR2_440SPE_RCSSET, 0x01010000); + } + + valPE0 = mfdcri(SDR0, PESDR0_440SPE_RCSSET); + valPE1 = mfdcri(SDR0, PESDR1_440SPE_RCSSET); + valPE2 = mfdcri(SDR0, PESDR2_440SPE_RCSSET); + + /* SDR0_PExRCSSET rstgu */ + if (!(valPE0 & 0x01000000) || + !(valPE1 & 0x01000000) || + !(valPE2 & 0x01000000)) { + printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstgu error\n"); + err = -1; + } + + /* SDR0_PExRCSSET rstdl */ + if (!(valPE0 & 0x00010000) || + !(valPE1 & 0x00010000) || + !(valPE2 & 0x00010000)) { + printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstdl error\n"); + err = -1; + } + + /* SDR0_PExRCSSET rstpyn */ + if ((valPE0 & 0x00001000) || + (valPE1 & 0x00001000) || + (valPE2 & 0x00001000)) { + printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstpyn error\n"); + err = -1; + } + + /* SDR0_PExRCSSET hldplb */ + if ((valPE0 & 0x10000000) || + (valPE1 & 0x10000000) || + (valPE2 & 0x10000000)) { + printk(KERN_INFO "PCIE: SDR0_PExRCSSET hldplb error\n"); + err = -1; + } + + /* SDR0_PExRCSSET rdy */ + if ((valPE0 & 0x00100000) || + (valPE1 & 0x00100000) || + (valPE2 & 0x00100000)) { + printk(KERN_INFO "PCIE: SDR0_PExRCSSET rdy error\n"); + err = -1; + } + + /* SDR0_PExRCSSET shutdown */ + if ((valPE0 & 0x00000100) || + (valPE1 & 0x00000100) || + (valPE2 & 0x00000100)) { + printk(KERN_INFO "PCIE: SDR0_PExRCSSET shutdown error\n"); + err = -1; + } + + return err; +} + +/* Global PCIe core initializations for 440SPe core */ +static int __init ppc440spe_pciex_core_init(struct device_node *np) +{ + int time_out = 20; + + /* Set PLL clock receiver to LVPECL */ + dcri_clrset(SDR0, PESDR0_PLLLCT1, 0, 1 << 28); + + /* Shouldn't we do all the calibration stuff etc... here ? */ + if (ppc440spe_pciex_check_reset(np)) + return -ENXIO; + + if (!(mfdcri(SDR0, PESDR0_PLLLCT2) & 0x10000)) { + printk(KERN_INFO "PCIE: PESDR_PLLCT2 resistance calibration " + "failed (0x%08x)\n", + mfdcri(SDR0, PESDR0_PLLLCT2)); + return -1; + } + + /* De-assert reset of PCIe PLL, wait for lock */ + dcri_clrset(SDR0, PESDR0_PLLLCT1, 1 << 24, 0); + udelay(3); + + while (time_out) { + if (!(mfdcri(SDR0, PESDR0_PLLLCT3) & 0x10000000)) { + time_out--; + udelay(1); + } else + break; + } + if (!time_out) { + printk(KERN_INFO "PCIE: VCO output not locked\n"); + return -1; + } + + pr_debug("PCIE initialization OK\n"); + + return 3; +} + +static int __init ppc440spe_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +{ + u32 val = 1 << 24; + + if (port->endpoint) + val = PTYPE_LEGACY_ENDPOINT << 20; + else + val = PTYPE_ROOT_PORT << 20; + + if (port->index == 0) + val |= LNKW_X8 << 12; + else + val |= LNKW_X4 << 12; + + mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val); + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x20222222); + if (ppc440spe_revA()) + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x11000000); + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL0SET1, 0x35000000); + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL1SET1, 0x35000000); + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL2SET1, 0x35000000); + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL3SET1, 0x35000000); + if (port->index == 0) { + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL4SET1, + 0x35000000); + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL5SET1, + 0x35000000); + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL6SET1, + 0x35000000); + mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL7SET1, + 0x35000000); + } + dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET, + (1 << 24) | (1 << 16), 1 << 12); + + return ppc4xx_pciex_port_reset_sdr(port); +} + +static int __init ppc440speA_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +{ + return ppc440spe_pciex_init_port_hw(port); +} + +static int __init ppc440speB_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +{ + int rc = ppc440spe_pciex_init_port_hw(port); + + port->has_ibpre = 1; + + return rc; +} + +static int ppc440speA_pciex_init_utl(struct ppc4xx_pciex_port *port) +{ + /* XXX Check what that value means... I hate magic */ + dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x68782800); + + /* + * Set buffer allocations and then assert VRB and TXE. + */ + out_be32(port->utl_base + PEUTL_OUTTR, 0x08000000); + out_be32(port->utl_base + PEUTL_INTR, 0x02000000); + out_be32(port->utl_base + PEUTL_OPDBSZ, 0x10000000); + out_be32(port->utl_base + PEUTL_PBBSZ, 0x53000000); + out_be32(port->utl_base + PEUTL_IPHBSZ, 0x08000000); + out_be32(port->utl_base + PEUTL_IPDBSZ, 0x10000000); + out_be32(port->utl_base + PEUTL_RCIRQEN, 0x00f00000); + out_be32(port->utl_base + PEUTL_PCTL, 0x80800066); + + return 0; +} + +static int ppc440speB_pciex_init_utl(struct ppc4xx_pciex_port *port) +{ + /* Report CRS to the operating system */ + out_be32(port->utl_base + PEUTL_PBCTL, 0x08000000); + + return 0; +} + +static struct ppc4xx_pciex_hwops ppc440speA_pcie_hwops __initdata = +{ + .want_sdr = true, + .core_init = ppc440spe_pciex_core_init, + .port_init_hw = ppc440speA_pciex_init_port_hw, + .setup_utl = ppc440speA_pciex_init_utl, + .check_link = ppc4xx_pciex_check_link_sdr, +}; + +static struct ppc4xx_pciex_hwops ppc440speB_pcie_hwops __initdata = +{ + .want_sdr = true, + .core_init = ppc440spe_pciex_core_init, + .port_init_hw = ppc440speB_pciex_init_port_hw, + .setup_utl = ppc440speB_pciex_init_utl, + .check_link = ppc4xx_pciex_check_link_sdr, +}; + +static int __init ppc460ex_pciex_core_init(struct device_node *np) +{ + /* Nothing to do, return 2 ports */ + return 2; +} + +static int __init ppc460ex_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +{ + u32 val; + u32 utlset1; + + if (port->endpoint) + val = PTYPE_LEGACY_ENDPOINT << 20; + else + val = PTYPE_ROOT_PORT << 20; + + if (port->index == 0) { + val |= LNKW_X1 << 12; + utlset1 = 0x20000000; + } else { + val |= LNKW_X4 << 12; + utlset1 = 0x20101101; + } + + mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val); + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, utlset1); + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01210000); + + switch (port->index) { + case 0: + mtdcri(SDR0, PESDR0_460EX_L0CDRCTL, 0x00003230); + mtdcri(SDR0, PESDR0_460EX_L0DRV, 0x00000130); + mtdcri(SDR0, PESDR0_460EX_L0CLK, 0x00000006); + + mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST,0x10000000); + break; + + case 1: + mtdcri(SDR0, PESDR1_460EX_L0CDRCTL, 0x00003230); + mtdcri(SDR0, PESDR1_460EX_L1CDRCTL, 0x00003230); + mtdcri(SDR0, PESDR1_460EX_L2CDRCTL, 0x00003230); + mtdcri(SDR0, PESDR1_460EX_L3CDRCTL, 0x00003230); + mtdcri(SDR0, PESDR1_460EX_L0DRV, 0x00000130); + mtdcri(SDR0, PESDR1_460EX_L1DRV, 0x00000130); + mtdcri(SDR0, PESDR1_460EX_L2DRV, 0x00000130); + mtdcri(SDR0, PESDR1_460EX_L3DRV, 0x00000130); + mtdcri(SDR0, PESDR1_460EX_L0CLK, 0x00000006); + mtdcri(SDR0, PESDR1_460EX_L1CLK, 0x00000006); + mtdcri(SDR0, PESDR1_460EX_L2CLK, 0x00000006); + mtdcri(SDR0, PESDR1_460EX_L3CLK, 0x00000006); + + mtdcri(SDR0, PESDR1_460EX_PHY_CTL_RST,0x10000000); + break; + } + + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, + mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) | + (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTPYN)); + + /* Poll for PHY reset */ + /* XXX FIXME add timeout */ + switch (port->index) { + case 0: + while (!(mfdcri(SDR0, PESDR0_460EX_RSTSTA) & 0x1)) + udelay(10); + break; + case 1: + while (!(mfdcri(SDR0, PESDR1_460EX_RSTSTA) & 0x1)) + udelay(10); + break; + } + + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, + (mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) & + ~(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL)) | + PESDRx_RCSSET_RSTPYN); + + port->has_ibpre = 1; + + return ppc4xx_pciex_port_reset_sdr(port); +} + +static int ppc460ex_pciex_init_utl(struct ppc4xx_pciex_port *port) +{ + dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x0); + + /* + * Set buffer allocations and then assert VRB and TXE. + */ + out_be32(port->utl_base + PEUTL_PBCTL, 0x0800000c); + out_be32(port->utl_base + PEUTL_OUTTR, 0x08000000); + out_be32(port->utl_base + PEUTL_INTR, 0x02000000); + out_be32(port->utl_base + PEUTL_OPDBSZ, 0x04000000); + out_be32(port->utl_base + PEUTL_PBBSZ, 0x00000000); + out_be32(port->utl_base + PEUTL_IPHBSZ, 0x02000000); + out_be32(port->utl_base + PEUTL_IPDBSZ, 0x04000000); + out_be32(port->utl_base + PEUTL_RCIRQEN,0x00f00000); + out_be32(port->utl_base + PEUTL_PCTL, 0x80800066); + + return 0; +} + +static struct ppc4xx_pciex_hwops ppc460ex_pcie_hwops __initdata = +{ + .want_sdr = true, + .core_init = ppc460ex_pciex_core_init, + .port_init_hw = ppc460ex_pciex_init_port_hw, + .setup_utl = ppc460ex_pciex_init_utl, + .check_link = ppc4xx_pciex_check_link_sdr, +}; + +static int __init apm821xx_pciex_core_init(struct device_node *np) +{ + /* Return the number of pcie port */ + return 1; +} + +static int __init apm821xx_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +{ + u32 val; + + /* + * Do a software reset on PCIe ports. + * This code is to fix the issue that pci drivers doesn't re-assign + * bus number for PCIE devices after Uboot + * scanned and configured all the buses (eg. PCIE NIC IntelPro/1000 + * PT quad port, SAS LSI 1064E) + */ + + mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x0); + mdelay(10); + + if (port->endpoint) + val = PTYPE_LEGACY_ENDPOINT << 20; + else + val = PTYPE_ROOT_PORT << 20; + + val |= LNKW_X1 << 12; + + mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val); + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x00000000); + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01010000); + + mtdcri(SDR0, PESDR0_460EX_L0CDRCTL, 0x00003230); + mtdcri(SDR0, PESDR0_460EX_L0DRV, 0x00000130); + mtdcri(SDR0, PESDR0_460EX_L0CLK, 0x00000006); + + mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x10000000); + mdelay(50); + mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x30000000); + + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, + mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) | + (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTPYN)); + + /* Poll for PHY reset */ + val = PESDR0_460EX_RSTSTA - port->sdr_base; + if (ppc4xx_pciex_wait_on_sdr(port, val, 0x1, 1, 100)) { + printk(KERN_WARNING "%s: PCIE: Can't reset PHY\n", __func__); + return -EBUSY; + } else { + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, + (mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) & + ~(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL)) | + PESDRx_RCSSET_RSTPYN); + + port->has_ibpre = 1; + return 0; + } +} + +static struct ppc4xx_pciex_hwops apm821xx_pcie_hwops __initdata = { + .want_sdr = true, + .core_init = apm821xx_pciex_core_init, + .port_init_hw = apm821xx_pciex_init_port_hw, + .setup_utl = ppc460ex_pciex_init_utl, + .check_link = ppc4xx_pciex_check_link_sdr, +}; + +static int __init ppc460sx_pciex_core_init(struct device_node *np) +{ + /* HSS drive amplitude */ + mtdcri(SDR0, PESDR0_460SX_HSSL0DAMP, 0xB9843211); + mtdcri(SDR0, PESDR0_460SX_HSSL1DAMP, 0xB9843211); + mtdcri(SDR0, PESDR0_460SX_HSSL2DAMP, 0xB9843211); + mtdcri(SDR0, PESDR0_460SX_HSSL3DAMP, 0xB9843211); + mtdcri(SDR0, PESDR0_460SX_HSSL4DAMP, 0xB9843211); + mtdcri(SDR0, PESDR0_460SX_HSSL5DAMP, 0xB9843211); + mtdcri(SDR0, PESDR0_460SX_HSSL6DAMP, 0xB9843211); + mtdcri(SDR0, PESDR0_460SX_HSSL7DAMP, 0xB9843211); + + mtdcri(SDR0, PESDR1_460SX_HSSL0DAMP, 0xB9843211); + mtdcri(SDR0, PESDR1_460SX_HSSL1DAMP, 0xB9843211); + mtdcri(SDR0, PESDR1_460SX_HSSL2DAMP, 0xB9843211); + mtdcri(SDR0, PESDR1_460SX_HSSL3DAMP, 0xB9843211); + + mtdcri(SDR0, PESDR2_460SX_HSSL0DAMP, 0xB9843211); + mtdcri(SDR0, PESDR2_460SX_HSSL1DAMP, 0xB9843211); + mtdcri(SDR0, PESDR2_460SX_HSSL2DAMP, 0xB9843211); + mtdcri(SDR0, PESDR2_460SX_HSSL3DAMP, 0xB9843211); + + /* HSS TX pre-emphasis */ + mtdcri(SDR0, PESDR0_460SX_HSSL0COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR0_460SX_HSSL1COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR0_460SX_HSSL2COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR0_460SX_HSSL3COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR0_460SX_HSSL4COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR0_460SX_HSSL5COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR0_460SX_HSSL6COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR0_460SX_HSSL7COEFA, 0xDCB98987); + + mtdcri(SDR0, PESDR1_460SX_HSSL0COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR1_460SX_HSSL1COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR1_460SX_HSSL2COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR1_460SX_HSSL3COEFA, 0xDCB98987); + + mtdcri(SDR0, PESDR2_460SX_HSSL0COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR2_460SX_HSSL1COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR2_460SX_HSSL2COEFA, 0xDCB98987); + mtdcri(SDR0, PESDR2_460SX_HSSL3COEFA, 0xDCB98987); + + /* HSS TX calibration control */ + mtdcri(SDR0, PESDR0_460SX_HSSL1CALDRV, 0x22222222); + mtdcri(SDR0, PESDR1_460SX_HSSL1CALDRV, 0x22220000); + mtdcri(SDR0, PESDR2_460SX_HSSL1CALDRV, 0x22220000); + + /* HSS TX slew control */ + mtdcri(SDR0, PESDR0_460SX_HSSSLEW, 0xFFFFFFFF); + mtdcri(SDR0, PESDR1_460SX_HSSSLEW, 0xFFFF0000); + mtdcri(SDR0, PESDR2_460SX_HSSSLEW, 0xFFFF0000); + + /* Set HSS PRBS enabled */ + mtdcri(SDR0, PESDR0_460SX_HSSCTLSET, 0x00001130); + mtdcri(SDR0, PESDR2_460SX_HSSCTLSET, 0x00001130); + + udelay(100); + + /* De-assert PLLRESET */ + dcri_clrset(SDR0, PESDR0_PLLLCT2, 0x00000100, 0); + + /* Reset DL, UTL, GPL before configuration */ + mtdcri(SDR0, PESDR0_460SX_RCSSET, + PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU); + mtdcri(SDR0, PESDR1_460SX_RCSSET, + PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU); + mtdcri(SDR0, PESDR2_460SX_RCSSET, + PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU); + + udelay(100); + + /* + * If bifurcation is not enabled, u-boot would have disabled the + * third PCIe port + */ + if (((mfdcri(SDR0, PESDR1_460SX_HSSCTLSET) & 0x00000001) == + 0x00000001)) { + printk(KERN_INFO "PCI: PCIE bifurcation setup successfully.\n"); + printk(KERN_INFO "PCI: Total 3 PCIE ports are present\n"); + return 3; + } + + printk(KERN_INFO "PCI: Total 2 PCIE ports are present\n"); + return 2; +} + +static int __init ppc460sx_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +{ + + if (port->endpoint) + dcri_clrset(SDR0, port->sdr_base + PESDRn_UTLSET2, + 0x01000000, 0); + else + dcri_clrset(SDR0, port->sdr_base + PESDRn_UTLSET2, + 0, 0x01000000); + + dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET, + (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL), + PESDRx_RCSSET_RSTPYN); + + port->has_ibpre = 1; + + return ppc4xx_pciex_port_reset_sdr(port); +} + +static int ppc460sx_pciex_init_utl(struct ppc4xx_pciex_port *port) +{ + /* Max 128 Bytes */ + out_be32 (port->utl_base + PEUTL_PBBSZ, 0x00000000); + /* Assert VRB and TXE - per datasheet turn off addr validation */ + out_be32(port->utl_base + PEUTL_PCTL, 0x80800000); + return 0; +} + +static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) +{ + void __iomem *mbase; + int attempt = 50; + + port->link = 0; + + mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000); + if (mbase == NULL) { + printk(KERN_ERR "%pOF: Can't map internal config space !", + port->node); + return; + } + + while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA) + & PECFG_460SX_DLLSTA_LINKUP))) { + attempt--; + mdelay(10); + } + if (attempt) + port->link = 1; + iounmap(mbase); +} + +static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = { + .want_sdr = true, + .core_init = ppc460sx_pciex_core_init, + .port_init_hw = ppc460sx_pciex_init_port_hw, + .setup_utl = ppc460sx_pciex_init_utl, + .check_link = ppc460sx_pciex_check_link, +}; + +#endif /* CONFIG_44x */ + +#ifdef CONFIG_40x + +static int __init ppc405ex_pciex_core_init(struct device_node *np) +{ + /* Nothing to do, return 2 ports */ + return 2; +} + +static void __init ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port) +{ + /* Assert the PE0_PHY reset */ + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000); + msleep(1); + + /* deassert the PE0_hotreset */ + if (port->endpoint) + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01111000); + else + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01101000); + + /* poll for phy !reset */ + /* XXX FIXME add timeout */ + while (!(mfdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSTA) & 0x00001000)) + ; + + /* deassert the PE0_gpl_utl_reset */ + mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x00101000); +} + +static int __init ppc405ex_pciex_init_port_hw(struct ppc4xx_pciex_port *port) +{ + u32 val; + + if (port->endpoint) + val = PTYPE_LEGACY_ENDPOINT; + else + val = PTYPE_ROOT_PORT; + + mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, + 1 << 24 | val << 20 | LNKW_X1 << 12); + + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x00000000); + mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01010000); + mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET1, 0x720F0000); + mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET2, 0x70600003); + + /* + * Only reset the PHY when no link is currently established. + * This is for the Atheros PCIe board which has problems to establish + * the link (again) after this PHY reset. All other currently tested + * PCIe boards don't show this problem. + * This has to be re-tested and fixed in a later release! + */ + val = mfdcri(SDR0, port->sdr_base + PESDRn_LOOP); + if (!(val & 0x00001000)) + ppc405ex_pcie_phy_reset(port); + + dcr_write(port->dcrs, DCRO_PEGPL_CFG, 0x10000000); /* guarded on */ + + port->has_ibpre = 1; + + return ppc4xx_pciex_port_reset_sdr(port); +} + +static int ppc405ex_pciex_init_utl(struct ppc4xx_pciex_port *port) +{ + dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x0); + + /* + * Set buffer allocations and then assert VRB and TXE. + */ + out_be32(port->utl_base + PEUTL_OUTTR, 0x02000000); + out_be32(port->utl_base + PEUTL_INTR, 0x02000000); + out_be32(port->utl_base + PEUTL_OPDBSZ, 0x04000000); + out_be32(port->utl_base + PEUTL_PBBSZ, 0x21000000); + out_be32(port->utl_base + PEUTL_IPHBSZ, 0x02000000); + out_be32(port->utl_base + PEUTL_IPDBSZ, 0x04000000); + out_be32(port->utl_base + PEUTL_RCIRQEN, 0x00f00000); + out_be32(port->utl_base + PEUTL_PCTL, 0x80800066); + + out_be32(port->utl_base + PEUTL_PBCTL, 0x08000000); + + return 0; +} + +static struct ppc4xx_pciex_hwops ppc405ex_pcie_hwops __initdata = +{ + .want_sdr = true, + .core_init = ppc405ex_pciex_core_init, + .port_init_hw = ppc405ex_pciex_init_port_hw, + .setup_utl = ppc405ex_pciex_init_utl, + .check_link = ppc4xx_pciex_check_link_sdr, +}; + +#endif /* CONFIG_40x */ + +#ifdef CONFIG_476FPE +static int __init ppc_476fpe_pciex_core_init(struct device_node *np) +{ + return 4; +} + +static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port) +{ + u32 timeout_ms = 20; + u32 val = 0, mask = (PECFG_TLDLP_LNKUP|PECFG_TLDLP_PRESENT); + void __iomem *mbase = ioremap(port->cfg_space.start + 0x10000000, + 0x1000); + + printk(KERN_INFO "PCIE%d: Checking link...\n", port->index); + + if (mbase == NULL) { + printk(KERN_WARNING "PCIE%d: failed to get cfg space\n", + port->index); + return; + } + + while (timeout_ms--) { + val = in_le32(mbase + PECFG_TLDLP); + + if ((val & mask) == mask) + break; + msleep(10); + } + + if (val & PECFG_TLDLP_PRESENT) { + printk(KERN_INFO "PCIE%d: link is up !\n", port->index); + port->link = 1; + } else + printk(KERN_WARNING "PCIE%d: Link up failed\n", port->index); + + iounmap(mbase); +} + +static struct ppc4xx_pciex_hwops ppc_476fpe_pcie_hwops __initdata = +{ + .core_init = ppc_476fpe_pciex_core_init, + .check_link = ppc_476fpe_pciex_check_link, +}; +#endif /* CONFIG_476FPE */ + +/* Check that the core has been initied and if not, do it */ +static int __init ppc4xx_pciex_check_core_init(struct device_node *np) +{ + static int core_init; + int count = -ENODEV; + + if (core_init++) + return 0; + +#ifdef CONFIG_44x + if (of_device_is_compatible(np, "ibm,plb-pciex-440spe")) { + if (ppc440spe_revA()) + ppc4xx_pciex_hwops = &ppc440speA_pcie_hwops; + else + ppc4xx_pciex_hwops = &ppc440speB_pcie_hwops; + } + if (of_device_is_compatible(np, "ibm,plb-pciex-460ex")) + ppc4xx_pciex_hwops = &ppc460ex_pcie_hwops; + if (of_device_is_compatible(np, "ibm,plb-pciex-460sx")) + ppc4xx_pciex_hwops = &ppc460sx_pcie_hwops; + if (of_device_is_compatible(np, "ibm,plb-pciex-apm821xx")) + ppc4xx_pciex_hwops = &apm821xx_pcie_hwops; +#endif /* CONFIG_44x */ +#ifdef CONFIG_40x + if (of_device_is_compatible(np, "ibm,plb-pciex-405ex")) + ppc4xx_pciex_hwops = &ppc405ex_pcie_hwops; +#endif +#ifdef CONFIG_476FPE + if (of_device_is_compatible(np, "ibm,plb-pciex-476fpe") + || of_device_is_compatible(np, "ibm,plb-pciex-476gtr")) + ppc4xx_pciex_hwops = &ppc_476fpe_pcie_hwops; +#endif + if (ppc4xx_pciex_hwops == NULL) { + printk(KERN_WARNING "PCIE: unknown host type %pOF\n", np); + return -ENODEV; + } + + count = ppc4xx_pciex_hwops->core_init(np); + if (count > 0) { + ppc4xx_pciex_ports = + kcalloc(count, sizeof(struct ppc4xx_pciex_port), + GFP_KERNEL); + if (ppc4xx_pciex_ports) { + ppc4xx_pciex_port_count = count; + return 0; + } + printk(KERN_WARNING "PCIE: failed to allocate ports array\n"); + return -ENOMEM; + } + return -ENODEV; +} + +static void __init ppc4xx_pciex_port_init_mapping(struct ppc4xx_pciex_port *port) +{ + /* We map PCI Express configuration based on the reg property */ + dcr_write(port->dcrs, DCRO_PEGPL_CFGBAH, + RES_TO_U32_HIGH(port->cfg_space.start)); + dcr_write(port->dcrs, DCRO_PEGPL_CFGBAL, + RES_TO_U32_LOW(port->cfg_space.start)); + + /* XXX FIXME: Use size from reg property. For now, map 512M */ + dcr_write(port->dcrs, DCRO_PEGPL_CFGMSK, 0xe0000001); + + /* We map UTL registers based on the reg property */ + dcr_write(port->dcrs, DCRO_PEGPL_REGBAH, + RES_TO_U32_HIGH(port->utl_regs.start)); + dcr_write(port->dcrs, DCRO_PEGPL_REGBAL, + RES_TO_U32_LOW(port->utl_regs.start)); + + /* XXX FIXME: Use size from reg property */ + dcr_write(port->dcrs, DCRO_PEGPL_REGMSK, 0x00007001); + + /* Disable all other outbound windows */ + dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL, 0); + dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKL, 0); + dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKL, 0); + dcr_write(port->dcrs, DCRO_PEGPL_MSGMSK, 0); +} + +static int __init ppc4xx_pciex_port_init(struct ppc4xx_pciex_port *port) +{ + int rc = 0; + + /* Init HW */ + if (ppc4xx_pciex_hwops->port_init_hw) + rc = ppc4xx_pciex_hwops->port_init_hw(port); + if (rc != 0) + return rc; + + /* + * Initialize mapping: disable all regions and configure + * CFG and REG regions based on resources in the device tree + */ + ppc4xx_pciex_port_init_mapping(port); + + if (ppc4xx_pciex_hwops->check_link) + ppc4xx_pciex_hwops->check_link(port); + + /* + * Map UTL + */ + port->utl_base = ioremap(port->utl_regs.start, 0x100); + BUG_ON(port->utl_base == NULL); + + /* + * Setup UTL registers --BenH. + */ + if (ppc4xx_pciex_hwops->setup_utl) + ppc4xx_pciex_hwops->setup_utl(port); + + /* + * Check for VC0 active or PLL Locked and assert RDY. + */ + if (port->sdr_base) { + if (of_device_is_compatible(port->node, + "ibm,plb-pciex-460sx")){ + if (port->link && ppc4xx_pciex_wait_on_sdr(port, + PESDRn_RCSSTS, + 1 << 12, 1 << 12, 5000)) { + printk(KERN_INFO "PCIE%d: PLL not locked\n", + port->index); + port->link = 0; + } + } else if (port->link && + ppc4xx_pciex_wait_on_sdr(port, PESDRn_RCSSTS, + 1 << 16, 1 << 16, 5000)) { + printk(KERN_INFO "PCIE%d: VC0 not active\n", + port->index); + port->link = 0; + } + + dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET, 0, 1 << 20); + } + + msleep(100); + + return 0; +} + +static int ppc4xx_pciex_validate_bdf(struct ppc4xx_pciex_port *port, + struct pci_bus *bus, + unsigned int devfn) +{ + static int message; + + /* Endpoint can not generate upstream(remote) config cycles */ + if (port->endpoint && bus->number != port->hose->first_busno) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* Check we are within the mapped range */ + if (bus->number > port->hose->last_busno) { + if (!message) { + printk(KERN_WARNING "Warning! Probing bus %u" + " out of range !\n", bus->number); + message++; + } + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* The root complex has only one device / function */ + if (bus->number == port->hose->first_busno && devfn != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* The other side of the RC has only one device as well */ + if (bus->number == (port->hose->first_busno + 1) && + PCI_SLOT(devfn) != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* Check if we have a link */ + if ((bus->number != port->hose->first_busno) && !port->link) + return PCIBIOS_DEVICE_NOT_FOUND; + + return 0; +} + +static void __iomem *ppc4xx_pciex_get_config_base(struct ppc4xx_pciex_port *port, + struct pci_bus *bus, + unsigned int devfn) +{ + int relbus; + + /* Remove the casts when we finally remove the stupid volatile + * in struct pci_controller + */ + if (bus->number == port->hose->first_busno) + return (void __iomem *)port->hose->cfg_addr; + + relbus = bus->number - (port->hose->first_busno + 1); + return (void __iomem *)port->hose->cfg_data + + ((relbus << 20) | (devfn << 12)); +} + +static int ppc4xx_pciex_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct ppc4xx_pciex_port *port = + &ppc4xx_pciex_ports[hose->indirect_type]; + void __iomem *addr; + u32 gpl_cfg; + + BUG_ON(hose != port->hose); + + if (ppc4xx_pciex_validate_bdf(port, bus, devfn) != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = ppc4xx_pciex_get_config_base(port, bus, devfn); + + /* + * Reading from configuration space of non-existing device can + * generate transaction errors. For the read duration we suppress + * assertion of machine check exceptions to avoid those. + */ + gpl_cfg = dcr_read(port->dcrs, DCRO_PEGPL_CFG); + dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg | GPL_DMER_MASK_DISA); + + /* Make sure no CRS is recorded */ + out_be32(port->utl_base + PEUTL_RCSTA, 0x00040000); + + switch (len) { + case 1: + *val = in_8((u8 *)(addr + offset)); + break; + case 2: + *val = in_le16((u16 *)(addr + offset)); + break; + default: + *val = in_le32((u32 *)(addr + offset)); + break; + } + + pr_debug("pcie-config-read: bus=%3d [%3d..%3d] devfn=0x%04x" + " offset=0x%04x len=%d, addr=0x%p val=0x%08x\n", + bus->number, hose->first_busno, hose->last_busno, + devfn, offset, len, addr + offset, *val); + + /* Check for CRS (440SPe rev B does that for us but heh ..) */ + if (in_be32(port->utl_base + PEUTL_RCSTA) & 0x00040000) { + pr_debug("Got CRS !\n"); + if (len != 4 || offset != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + *val = 0xffff0001; + } + + dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg); + + return PCIBIOS_SUCCESSFUL; +} + +static int ppc4xx_pciex_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct ppc4xx_pciex_port *port = + &ppc4xx_pciex_ports[hose->indirect_type]; + void __iomem *addr; + u32 gpl_cfg; + + if (ppc4xx_pciex_validate_bdf(port, bus, devfn) != 0) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = ppc4xx_pciex_get_config_base(port, bus, devfn); + + /* + * Reading from configuration space of non-existing device can + * generate transaction errors. For the read duration we suppress + * assertion of machine check exceptions to avoid those. + */ + gpl_cfg = dcr_read(port->dcrs, DCRO_PEGPL_CFG); + dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg | GPL_DMER_MASK_DISA); + + pr_debug("pcie-config-write: bus=%3d [%3d..%3d] devfn=0x%04x" + " offset=0x%04x len=%d, addr=0x%p val=0x%08x\n", + bus->number, hose->first_busno, hose->last_busno, + devfn, offset, len, addr + offset, val); + + switch (len) { + case 1: + out_8((u8 *)(addr + offset), val); + break; + case 2: + out_le16((u16 *)(addr + offset), val); + break; + default: + out_le32((u32 *)(addr + offset), val); + break; + } + + dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg); + + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops ppc4xx_pciex_pci_ops = +{ + .read = ppc4xx_pciex_read_config, + .write = ppc4xx_pciex_write_config, +}; + +static int __init ppc4xx_setup_one_pciex_POM(struct ppc4xx_pciex_port *port, + struct pci_controller *hose, + void __iomem *mbase, + u64 plb_addr, + u64 pci_addr, + u64 size, + unsigned int flags, + int index) +{ + u32 lah, lal, pciah, pcial, sa; + + if (!is_power_of_2(size) || + (index < 2 && size < 0x100000) || + (index == 2 && size < 0x100) || + (plb_addr & (size - 1)) != 0) { + printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn); + return -1; + } + + /* Calculate register values */ + lah = RES_TO_U32_HIGH(plb_addr); + lal = RES_TO_U32_LOW(plb_addr); + pciah = RES_TO_U32_HIGH(pci_addr); + pcial = RES_TO_U32_LOW(pci_addr); + sa = (0xffffffffu << ilog2(size)) | 0x1; + + /* Program register values */ + switch (index) { + case 0: + out_le32(mbase + PECFG_POM0LAH, pciah); + out_le32(mbase + PECFG_POM0LAL, pcial); + dcr_write(port->dcrs, DCRO_PEGPL_OMR1BAH, lah); + dcr_write(port->dcrs, DCRO_PEGPL_OMR1BAL, lal); + dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKH, 0x7fffffff); + /*Enabled and single region */ + if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx")) + dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL, + sa | DCRO_PEGPL_460SX_OMR1MSKL_UOT + | DCRO_PEGPL_OMRxMSKL_VAL); + else if (of_device_is_compatible( + port->node, "ibm,plb-pciex-476fpe") || + of_device_is_compatible( + port->node, "ibm,plb-pciex-476gtr")) + dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL, + sa | DCRO_PEGPL_476FPE_OMR1MSKL_UOT + | DCRO_PEGPL_OMRxMSKL_VAL); + else + dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL, + sa | DCRO_PEGPL_OMR1MSKL_UOT + | DCRO_PEGPL_OMRxMSKL_VAL); + break; + case 1: + out_le32(mbase + PECFG_POM1LAH, pciah); + out_le32(mbase + PECFG_POM1LAL, pcial); + dcr_write(port->dcrs, DCRO_PEGPL_OMR2BAH, lah); + dcr_write(port->dcrs, DCRO_PEGPL_OMR2BAL, lal); + dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKH, 0x7fffffff); + dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKL, + sa | DCRO_PEGPL_OMRxMSKL_VAL); + break; + case 2: + out_le32(mbase + PECFG_POM2LAH, pciah); + out_le32(mbase + PECFG_POM2LAL, pcial); + dcr_write(port->dcrs, DCRO_PEGPL_OMR3BAH, lah); + dcr_write(port->dcrs, DCRO_PEGPL_OMR3BAL, lal); + dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKH, 0x7fffffff); + /* Note that 3 here means enabled | IO space !!! */ + dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKL, + sa | DCRO_PEGPL_OMR3MSKL_IO + | DCRO_PEGPL_OMRxMSKL_VAL); + break; + } + + return 0; +} + +static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port, + struct pci_controller *hose, + void __iomem *mbase) +{ + int i, j, found_isa_hole = 0; + + /* Setup outbound memory windows */ + for (i = j = 0; i < 3; i++) { + struct resource *res = &hose->mem_resources[i]; + resource_size_t offset = hose->mem_offset[i]; + + /* we only care about memory windows */ + if (!(res->flags & IORESOURCE_MEM)) + continue; + if (j > 1) { + printk(KERN_WARNING "%pOF: Too many ranges\n", + port->node); + break; + } + + /* Configure the resource */ + if (ppc4xx_setup_one_pciex_POM(port, hose, mbase, + res->start, + res->start - offset, + resource_size(res), + res->flags, + j) == 0) { + j++; + + /* If the resource PCI address is 0 then we have our + * ISA memory hole + */ + if (res->start == offset) + found_isa_hole = 1; + } + } + + /* Handle ISA memory hole if not already covered */ + if (j <= 1 && !found_isa_hole && hose->isa_mem_size) + if (ppc4xx_setup_one_pciex_POM(port, hose, mbase, + hose->isa_mem_phys, 0, + hose->isa_mem_size, 0, j) == 0) + printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n", + hose->dn); + + /* Configure IO, always 64K starting at 0. We hard wire it to 64K ! + * Note also that it -has- to be region index 2 on this HW + */ + if (hose->io_resource.flags & IORESOURCE_IO) + ppc4xx_setup_one_pciex_POM(port, hose, mbase, + hose->io_base_phys, 0, + 0x10000, IORESOURCE_IO, 2); +} + +static void __init ppc4xx_configure_pciex_PIMs(struct ppc4xx_pciex_port *port, + struct pci_controller *hose, + void __iomem *mbase, + struct resource *res) +{ + resource_size_t size = resource_size(res); + u64 sa; + + if (port->endpoint) { + resource_size_t ep_addr = 0; + resource_size_t ep_size = 32 << 20; + + /* Currently we map a fixed 64MByte window to PLB address + * 0 (SDRAM). This should probably be configurable via a dts + * property. + */ + + /* Calculate window size */ + sa = (0xffffffffffffffffull << ilog2(ep_size)); + + /* Setup BAR0 */ + out_le32(mbase + PECFG_BAR0HMPA, RES_TO_U32_HIGH(sa)); + out_le32(mbase + PECFG_BAR0LMPA, RES_TO_U32_LOW(sa) | + PCI_BASE_ADDRESS_MEM_TYPE_64); + + /* Disable BAR1 & BAR2 */ + out_le32(mbase + PECFG_BAR1MPA, 0); + out_le32(mbase + PECFG_BAR2HMPA, 0); + out_le32(mbase + PECFG_BAR2LMPA, 0); + + out_le32(mbase + PECFG_PIM01SAH, RES_TO_U32_HIGH(sa)); + out_le32(mbase + PECFG_PIM01SAL, RES_TO_U32_LOW(sa)); + + out_le32(mbase + PCI_BASE_ADDRESS_0, RES_TO_U32_LOW(ep_addr)); + out_le32(mbase + PCI_BASE_ADDRESS_1, RES_TO_U32_HIGH(ep_addr)); + } else { + /* Calculate window size */ + sa = (0xffffffffffffffffull << ilog2(size)); + if (res->flags & IORESOURCE_PREFETCH) + sa |= PCI_BASE_ADDRESS_MEM_PREFETCH; + + if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx") || + of_device_is_compatible( + port->node, "ibm,plb-pciex-476fpe") || + of_device_is_compatible( + port->node, "ibm,plb-pciex-476gtr")) + sa |= PCI_BASE_ADDRESS_MEM_TYPE_64; + + out_le32(mbase + PECFG_BAR0HMPA, RES_TO_U32_HIGH(sa)); + out_le32(mbase + PECFG_BAR0LMPA, RES_TO_U32_LOW(sa)); + + /* The setup of the split looks weird to me ... let's see + * if it works + */ + out_le32(mbase + PECFG_PIM0LAL, 0x00000000); + out_le32(mbase + PECFG_PIM0LAH, 0x00000000); + out_le32(mbase + PECFG_PIM1LAL, 0x00000000); + out_le32(mbase + PECFG_PIM1LAH, 0x00000000); + out_le32(mbase + PECFG_PIM01SAH, 0xffff0000); + out_le32(mbase + PECFG_PIM01SAL, 0x00000000); + + out_le32(mbase + PCI_BASE_ADDRESS_0, RES_TO_U32_LOW(res->start)); + out_le32(mbase + PCI_BASE_ADDRESS_1, RES_TO_U32_HIGH(res->start)); + } + + /* Enable inbound mapping */ + out_le32(mbase + PECFG_PIMEN, 0x1); + + /* Enable I/O, Mem, and Busmaster cycles */ + out_le16(mbase + PCI_COMMAND, + in_le16(mbase + PCI_COMMAND) | + PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); +} + +static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port) +{ + struct resource dma_window; + struct pci_controller *hose = NULL; + const int *bus_range; + int primary, busses; + void __iomem *mbase = NULL, *cfg_data = NULL; + const u32 *pval; + u32 val; + + /* Check if primary bridge */ + primary = of_property_read_bool(port->node, "primary"); + + /* Get bus range if any */ + bus_range = of_get_property(port->node, "bus-range", NULL); + + /* Allocate the host controller data structure */ + hose = pcibios_alloc_controller(port->node); + if (!hose) + goto fail; + + /* We stick the port number in "indirect_type" so the config space + * ops can retrieve the port data structure easily + */ + hose->indirect_type = port->index; + + /* Get bus range */ + hose->first_busno = bus_range ? bus_range[0] : 0x0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + /* Because of how big mapping the config space is (1M per bus), we + * limit how many busses we support. In the long run, we could replace + * that with something akin to kmap_atomic instead. We set aside 1 bus + * for the host itself too. + */ + busses = hose->last_busno - hose->first_busno; /* This is off by 1 */ + if (busses > MAX_PCIE_BUS_MAPPED) { + busses = MAX_PCIE_BUS_MAPPED; + hose->last_busno = hose->first_busno + busses; + } + + if (!port->endpoint) { + /* Only map the external config space in cfg_data for + * PCIe root-complexes. External space is 1M per bus + */ + cfg_data = ioremap(port->cfg_space.start + + (hose->first_busno + 1) * 0x100000, + busses * 0x100000); + if (cfg_data == NULL) { + printk(KERN_ERR "%pOF: Can't map external config space !", + port->node); + goto fail; + } + hose->cfg_data = cfg_data; + } + + /* Always map the host config space in cfg_addr. + * Internal space is 4K + */ + mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000); + if (mbase == NULL) { + printk(KERN_ERR "%pOF: Can't map internal config space !", + port->node); + goto fail; + } + hose->cfg_addr = mbase; + + pr_debug("PCIE %pOF, bus %d..%d\n", port->node, + hose->first_busno, hose->last_busno); + pr_debug(" config space mapped at: root @0x%p, other @0x%p\n", + hose->cfg_addr, hose->cfg_data); + + /* Setup config space */ + hose->ops = &ppc4xx_pciex_pci_ops; + port->hose = hose; + mbase = (void __iomem *)hose->cfg_addr; + + if (!port->endpoint) { + /* + * Set bus numbers on our root port + */ + out_8(mbase + PCI_PRIMARY_BUS, hose->first_busno); + out_8(mbase + PCI_SECONDARY_BUS, hose->first_busno + 1); + out_8(mbase + PCI_SUBORDINATE_BUS, hose->last_busno); + } + + /* + * OMRs are already reset, also disable PIMs + */ + out_le32(mbase + PECFG_PIMEN, 0); + + /* Parse outbound mapping resources */ + pci_process_bridge_OF_ranges(hose, port->node, primary); + + /* Parse inbound mapping resources */ + if (ppc4xx_parse_dma_ranges(hose, mbase, &dma_window) != 0) + goto fail; + + /* Configure outbound ranges POMs */ + ppc4xx_configure_pciex_POMs(port, hose, mbase); + + /* Configure inbound ranges PIMs */ + ppc4xx_configure_pciex_PIMs(port, hose, mbase, &dma_window); + + /* The root complex doesn't show up if we don't set some vendor + * and device IDs into it. The defaults below are the same bogus + * one that the initial code in arch/ppc had. This can be + * overwritten by setting the "vendor-id/device-id" properties + * in the pciex node. + */ + + /* Get the (optional) vendor-/device-id from the device-tree */ + pval = of_get_property(port->node, "vendor-id", NULL); + if (pval) { + val = *pval; + } else { + if (!port->endpoint) + val = 0xaaa0 + port->index; + else + val = 0xeee0 + port->index; + } + out_le16(mbase + 0x200, val); + + pval = of_get_property(port->node, "device-id", NULL); + if (pval) { + val = *pval; + } else { + if (!port->endpoint) + val = 0xbed0 + port->index; + else + val = 0xfed0 + port->index; + } + out_le16(mbase + 0x202, val); + + /* Enable Bus master, memory, and io space */ + if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx")) + out_le16(mbase + 0x204, 0x7); + + if (!port->endpoint) { + /* Set Class Code to PCI-PCI bridge and Revision Id to 1 */ + out_le32(mbase + 0x208, 0x06040001); + + printk(KERN_INFO "PCIE%d: successfully set as root-complex\n", + port->index); + } else { + /* Set Class Code to Processor/PPC */ + out_le32(mbase + 0x208, 0x0b200001); + + printk(KERN_INFO "PCIE%d: successfully set as endpoint\n", + port->index); + } + + return; + fail: + if (hose) + pcibios_free_controller(hose); + if (cfg_data) + iounmap(cfg_data); + if (mbase) + iounmap(mbase); +} + +static void __init ppc4xx_probe_pciex_bridge(struct device_node *np) +{ + struct ppc4xx_pciex_port *port; + const u32 *pval; + int portno; + unsigned int dcrs; + + /* First, proceed to core initialization as we assume there's + * only one PCIe core in the system + */ + if (ppc4xx_pciex_check_core_init(np)) + return; + + /* Get the port number from the device-tree */ + pval = of_get_property(np, "port", NULL); + if (pval == NULL) { + printk(KERN_ERR "PCIE: Can't find port number for %pOF\n", np); + return; + } + portno = *pval; + if (portno >= ppc4xx_pciex_port_count) { + printk(KERN_ERR "PCIE: port number out of range for %pOF\n", + np); + return; + } + port = &ppc4xx_pciex_ports[portno]; + port->index = portno; + + /* + * Check if device is enabled + */ + if (!of_device_is_available(np)) { + printk(KERN_INFO "PCIE%d: Port disabled via device-tree\n", port->index); + return; + } + + port->node = of_node_get(np); + if (ppc4xx_pciex_hwops->want_sdr) { + pval = of_get_property(np, "sdr-base", NULL); + if (pval == NULL) { + printk(KERN_ERR "PCIE: missing sdr-base for %pOF\n", + np); + return; + } + port->sdr_base = *pval; + } + + /* Check if device_type property is set to "pci" or "pci-endpoint". + * Resulting from this setup this PCIe port will be configured + * as root-complex or as endpoint. + */ + if (of_node_is_type(port->node, "pci-endpoint")) { + port->endpoint = 1; + } else if (of_node_is_type(port->node, "pci")) { + port->endpoint = 0; + } else { + printk(KERN_ERR "PCIE: missing or incorrect device_type for %pOF\n", + np); + return; + } + + /* Fetch config space registers address */ + if (of_address_to_resource(np, 0, &port->cfg_space)) { + printk(KERN_ERR "%pOF: Can't get PCI-E config space !", np); + return; + } + /* Fetch host bridge internal registers address */ + if (of_address_to_resource(np, 1, &port->utl_regs)) { + printk(KERN_ERR "%pOF: Can't get UTL register base !", np); + return; + } + + /* Map DCRs */ + dcrs = dcr_resource_start(np, 0); + if (dcrs == 0) { + printk(KERN_ERR "%pOF: Can't get DCR register base !", np); + return; + } + port->dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0)); + + /* Initialize the port specific registers */ + if (ppc4xx_pciex_port_init(port)) { + printk(KERN_WARNING "PCIE%d: Port init failed\n", port->index); + return; + } + + /* Setup the linux hose data structure */ + ppc4xx_pciex_port_setup_hose(port); +} + +#endif /* CONFIG_PPC4xx_PCI_EXPRESS */ + +static int __init ppc4xx_pci_find_bridges(void) +{ + struct device_node *np; + + pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); + +#ifdef CONFIG_PPC4xx_PCI_EXPRESS + for_each_compatible_node(np, NULL, "ibm,plb-pciex") + ppc4xx_probe_pciex_bridge(np); +#endif + for_each_compatible_node(np, NULL, "ibm,plb-pcix") + ppc4xx_probe_pcix_bridge(np); + for_each_compatible_node(np, NULL, "ibm,plb-pci") + ppc4xx_probe_pci_bridge(np); + + return 0; +} +arch_initcall(ppc4xx_pci_find_bridges); + diff --git a/arch/powerpc/platforms/4xx/pci.h b/arch/powerpc/platforms/4xx/pci.h new file mode 100644 index 0000000000..bb4821938a --- /dev/null +++ b/arch/powerpc/platforms/4xx/pci.h @@ -0,0 +1,505 @@ +/* + * PCI / PCI-X / PCI-Express support for 4xx parts + * + * Copyright 2007 Ben. Herrenschmidt , IBM Corp. + * + * Bits and pieces extracted from arch/ppc support by + * + * Matt Porter + * + * Copyright 2002-2005 MontaVista Software Inc. + */ +#ifndef __PPC4XX_PCI_H__ +#define __PPC4XX_PCI_H__ + +/* + * 4xx PCI-X bridge register definitions + */ +#define PCIX0_VENDID 0x000 +#define PCIX0_DEVID 0x002 +#define PCIX0_COMMAND 0x004 +#define PCIX0_STATUS 0x006 +#define PCIX0_REVID 0x008 +#define PCIX0_CLS 0x009 +#define PCIX0_CACHELS 0x00c +#define PCIX0_LATTIM 0x00d +#define PCIX0_HDTYPE 0x00e +#define PCIX0_BIST 0x00f +#define PCIX0_BAR0L 0x010 +#define PCIX0_BAR0H 0x014 +#define PCIX0_BAR1 0x018 +#define PCIX0_BAR2L 0x01c +#define PCIX0_BAR2H 0x020 +#define PCIX0_BAR3 0x024 +#define PCIX0_CISPTR 0x028 +#define PCIX0_SBSYSVID 0x02c +#define PCIX0_SBSYSID 0x02e +#define PCIX0_EROMBA 0x030 +#define PCIX0_CAP 0x034 +#define PCIX0_RES0 0x035 +#define PCIX0_RES1 0x036 +#define PCIX0_RES2 0x038 +#define PCIX0_INTLN 0x03c +#define PCIX0_INTPN 0x03d +#define PCIX0_MINGNT 0x03e +#define PCIX0_MAXLTNCY 0x03f +#define PCIX0_BRDGOPT1 0x040 +#define PCIX0_BRDGOPT2 0x044 +#define PCIX0_ERREN 0x050 +#define PCIX0_ERRSTS 0x054 +#define PCIX0_PLBBESR 0x058 +#define PCIX0_PLBBEARL 0x05c +#define PCIX0_PLBBEARH 0x060 +#define PCIX0_POM0LAL 0x068 +#define PCIX0_POM0LAH 0x06c +#define PCIX0_POM0SA 0x070 +#define PCIX0_POM0PCIAL 0x074 +#define PCIX0_POM0PCIAH 0x078 +#define PCIX0_POM1LAL 0x07c +#define PCIX0_POM1LAH 0x080 +#define PCIX0_POM1SA 0x084 +#define PCIX0_POM1PCIAL 0x088 +#define PCIX0_POM1PCIAH 0x08c +#define PCIX0_POM2SA 0x090 +#define PCIX0_PIM0SAL 0x098 +#define PCIX0_PIM0SA PCIX0_PIM0SAL +#define PCIX0_PIM0LAL 0x09c +#define PCIX0_PIM0LAH 0x0a0 +#define PCIX0_PIM1SA 0x0a4 +#define PCIX0_PIM1LAL 0x0a8 +#define PCIX0_PIM1LAH 0x0ac +#define PCIX0_PIM2SAL 0x0b0 +#define PCIX0_PIM2SA PCIX0_PIM2SAL +#define PCIX0_PIM2LAL 0x0b4 +#define PCIX0_PIM2LAH 0x0b8 +#define PCIX0_OMCAPID 0x0c0 +#define PCIX0_OMNIPTR 0x0c1 +#define PCIX0_OMMC 0x0c2 +#define PCIX0_OMMA 0x0c4 +#define PCIX0_OMMUA 0x0c8 +#define PCIX0_OMMDATA 0x0cc +#define PCIX0_OMMEOI 0x0ce +#define PCIX0_PMCAPID 0x0d0 +#define PCIX0_PMNIPTR 0x0d1 +#define PCIX0_PMC 0x0d2 +#define PCIX0_PMCSR 0x0d4 +#define PCIX0_PMCSRBSE 0x0d6 +#define PCIX0_PMDATA 0x0d7 +#define PCIX0_PMSCRR 0x0d8 +#define PCIX0_CAPID 0x0dc +#define PCIX0_NIPTR 0x0dd +#define PCIX0_CMD 0x0de +#define PCIX0_STS 0x0e0 +#define PCIX0_IDR 0x0e4 +#define PCIX0_CID 0x0e8 +#define PCIX0_RID 0x0ec +#define PCIX0_PIM0SAH 0x0f8 +#define PCIX0_PIM2SAH 0x0fc +#define PCIX0_MSGIL 0x100 +#define PCIX0_MSGIH 0x104 +#define PCIX0_MSGOL 0x108 +#define PCIX0_MSGOH 0x10c +#define PCIX0_IM 0x1f8 + +/* + * 4xx PCI bridge register definitions + */ +#define PCIL0_PMM0LA 0x00 +#define PCIL0_PMM0MA 0x04 +#define PCIL0_PMM0PCILA 0x08 +#define PCIL0_PMM0PCIHA 0x0c +#define PCIL0_PMM1LA 0x10 +#define PCIL0_PMM1MA 0x14 +#define PCIL0_PMM1PCILA 0x18 +#define PCIL0_PMM1PCIHA 0x1c +#define PCIL0_PMM2LA 0x20 +#define PCIL0_PMM2MA 0x24 +#define PCIL0_PMM2PCILA 0x28 +#define PCIL0_PMM2PCIHA 0x2c +#define PCIL0_PTM1MS 0x30 +#define PCIL0_PTM1LA 0x34 +#define PCIL0_PTM2MS 0x38 +#define PCIL0_PTM2LA 0x3c + +/* + * 4xx PCIe bridge register definitions + */ + +/* DCR offsets */ +#define DCRO_PEGPL_CFGBAH 0x00 +#define DCRO_PEGPL_CFGBAL 0x01 +#define DCRO_PEGPL_CFGMSK 0x02 +#define DCRO_PEGPL_MSGBAH 0x03 +#define DCRO_PEGPL_MSGBAL 0x04 +#define DCRO_PEGPL_MSGMSK 0x05 +#define DCRO_PEGPL_OMR1BAH 0x06 +#define DCRO_PEGPL_OMR1BAL 0x07 +#define DCRO_PEGPL_OMR1MSKH 0x08 +#define DCRO_PEGPL_OMR1MSKL 0x09 +#define DCRO_PEGPL_OMR2BAH 0x0a +#define DCRO_PEGPL_OMR2BAL 0x0b +#define DCRO_PEGPL_OMR2MSKH 0x0c +#define DCRO_PEGPL_OMR2MSKL 0x0d +#define DCRO_PEGPL_OMR3BAH 0x0e +#define DCRO_PEGPL_OMR3BAL 0x0f +#define DCRO_PEGPL_OMR3MSKH 0x10 +#define DCRO_PEGPL_OMR3MSKL 0x11 +#define DCRO_PEGPL_REGBAH 0x12 +#define DCRO_PEGPL_REGBAL 0x13 +#define DCRO_PEGPL_REGMSK 0x14 +#define DCRO_PEGPL_SPECIAL 0x15 +#define DCRO_PEGPL_CFG 0x16 +#define DCRO_PEGPL_ESR 0x17 +#define DCRO_PEGPL_EARH 0x18 +#define DCRO_PEGPL_EARL 0x19 +#define DCRO_PEGPL_EATR 0x1a + +/* DMER mask */ +#define GPL_DMER_MASK_DISA 0x02000000 + +/* + * System DCRs (SDRs) + */ +#define PESDR0_PLLLCT1 0x03a0 +#define PESDR0_PLLLCT2 0x03a1 +#define PESDR0_PLLLCT3 0x03a2 + +/* + * 440SPe additional DCRs + */ +#define PESDR0_440SPE_UTLSET1 0x0300 +#define PESDR0_440SPE_UTLSET2 0x0301 +#define PESDR0_440SPE_DLPSET 0x0302 +#define PESDR0_440SPE_LOOP 0x0303 +#define PESDR0_440SPE_RCSSET 0x0304 +#define PESDR0_440SPE_RCSSTS 0x0305 +#define PESDR0_440SPE_HSSL0SET1 0x0306 +#define PESDR0_440SPE_HSSL0SET2 0x0307 +#define PESDR0_440SPE_HSSL0STS 0x0308 +#define PESDR0_440SPE_HSSL1SET1 0x0309 +#define PESDR0_440SPE_HSSL1SET2 0x030a +#define PESDR0_440SPE_HSSL1STS 0x030b +#define PESDR0_440SPE_HSSL2SET1 0x030c +#define PESDR0_440SPE_HSSL2SET2 0x030d +#define PESDR0_440SPE_HSSL2STS 0x030e +#define PESDR0_440SPE_HSSL3SET1 0x030f +#define PESDR0_440SPE_HSSL3SET2 0x0310 +#define PESDR0_440SPE_HSSL3STS 0x0311 +#define PESDR0_440SPE_HSSL4SET1 0x0312 +#define PESDR0_440SPE_HSSL4SET2 0x0313 +#define PESDR0_440SPE_HSSL4STS 0x0314 +#define PESDR0_440SPE_HSSL5SET1 0x0315 +#define PESDR0_440SPE_HSSL5SET2 0x0316 +#define PESDR0_440SPE_HSSL5STS 0x0317 +#define PESDR0_440SPE_HSSL6SET1 0x0318 +#define PESDR0_440SPE_HSSL6SET2 0x0319 +#define PESDR0_440SPE_HSSL6STS 0x031a +#define PESDR0_440SPE_HSSL7SET1 0x031b +#define PESDR0_440SPE_HSSL7SET2 0x031c +#define PESDR0_440SPE_HSSL7STS 0x031d +#define PESDR0_440SPE_HSSCTLSET 0x031e +#define PESDR0_440SPE_LANE_ABCD 0x031f +#define PESDR0_440SPE_LANE_EFGH 0x0320 + +#define PESDR1_440SPE_UTLSET1 0x0340 +#define PESDR1_440SPE_UTLSET2 0x0341 +#define PESDR1_440SPE_DLPSET 0x0342 +#define PESDR1_440SPE_LOOP 0x0343 +#define PESDR1_440SPE_RCSSET 0x0344 +#define PESDR1_440SPE_RCSSTS 0x0345 +#define PESDR1_440SPE_HSSL0SET1 0x0346 +#define PESDR1_440SPE_HSSL0SET2 0x0347 +#define PESDR1_440SPE_HSSL0STS 0x0348 +#define PESDR1_440SPE_HSSL1SET1 0x0349 +#define PESDR1_440SPE_HSSL1SET2 0x034a +#define PESDR1_440SPE_HSSL1STS 0x034b +#define PESDR1_440SPE_HSSL2SET1 0x034c +#define PESDR1_440SPE_HSSL2SET2 0x034d +#define PESDR1_440SPE_HSSL2STS 0x034e +#define PESDR1_440SPE_HSSL3SET1 0x034f +#define PESDR1_440SPE_HSSL3SET2 0x0350 +#define PESDR1_440SPE_HSSL3STS 0x0351 +#define PESDR1_440SPE_HSSCTLSET 0x0352 +#define PESDR1_440SPE_LANE_ABCD 0x0353 + +#define PESDR2_440SPE_UTLSET1 0x0370 +#define PESDR2_440SPE_UTLSET2 0x0371 +#define PESDR2_440SPE_DLPSET 0x0372 +#define PESDR2_440SPE_LOOP 0x0373 +#define PESDR2_440SPE_RCSSET 0x0374 +#define PESDR2_440SPE_RCSSTS 0x0375 +#define PESDR2_440SPE_HSSL0SET1 0x0376 +#define PESDR2_440SPE_HSSL0SET2 0x0377 +#define PESDR2_440SPE_HSSL0STS 0x0378 +#define PESDR2_440SPE_HSSL1SET1 0x0379 +#define PESDR2_440SPE_HSSL1SET2 0x037a +#define PESDR2_440SPE_HSSL1STS 0x037b +#define PESDR2_440SPE_HSSL2SET1 0x037c +#define PESDR2_440SPE_HSSL2SET2 0x037d +#define PESDR2_440SPE_HSSL2STS 0x037e +#define PESDR2_440SPE_HSSL3SET1 0x037f +#define PESDR2_440SPE_HSSL3SET2 0x0380 +#define PESDR2_440SPE_HSSL3STS 0x0381 +#define PESDR2_440SPE_HSSCTLSET 0x0382 +#define PESDR2_440SPE_LANE_ABCD 0x0383 + +/* + * 405EX additional DCRs + */ +#define PESDR0_405EX_UTLSET1 0x0400 +#define PESDR0_405EX_UTLSET2 0x0401 +#define PESDR0_405EX_DLPSET 0x0402 +#define PESDR0_405EX_LOOP 0x0403 +#define PESDR0_405EX_RCSSET 0x0404 +#define PESDR0_405EX_RCSSTS 0x0405 +#define PESDR0_405EX_PHYSET1 0x0406 +#define PESDR0_405EX_PHYSET2 0x0407 +#define PESDR0_405EX_BIST 0x0408 +#define PESDR0_405EX_LPB 0x040B +#define PESDR0_405EX_PHYSTA 0x040C + +#define PESDR1_405EX_UTLSET1 0x0440 +#define PESDR1_405EX_UTLSET2 0x0441 +#define PESDR1_405EX_DLPSET 0x0442 +#define PESDR1_405EX_LOOP 0x0443 +#define PESDR1_405EX_RCSSET 0x0444 +#define PESDR1_405EX_RCSSTS 0x0445 +#define PESDR1_405EX_PHYSET1 0x0446 +#define PESDR1_405EX_PHYSET2 0x0447 +#define PESDR1_405EX_BIST 0x0448 +#define PESDR1_405EX_LPB 0x044B +#define PESDR1_405EX_PHYSTA 0x044C + +/* + * 460EX additional DCRs + */ +#define PESDR0_460EX_L0BIST 0x0308 +#define PESDR0_460EX_L0BISTSTS 0x0309 +#define PESDR0_460EX_L0CDRCTL 0x030A +#define PESDR0_460EX_L0DRV 0x030B +#define PESDR0_460EX_L0REC 0x030C +#define PESDR0_460EX_L0LPB 0x030D +#define PESDR0_460EX_L0CLK 0x030E +#define PESDR0_460EX_PHY_CTL_RST 0x030F +#define PESDR0_460EX_RSTSTA 0x0310 +#define PESDR0_460EX_OBS 0x0311 +#define PESDR0_460EX_L0ERRC 0x0320 + +#define PESDR1_460EX_L0BIST 0x0348 +#define PESDR1_460EX_L1BIST 0x0349 +#define PESDR1_460EX_L2BIST 0x034A +#define PESDR1_460EX_L3BIST 0x034B +#define PESDR1_460EX_L0BISTSTS 0x034C +#define PESDR1_460EX_L1BISTSTS 0x034D +#define PESDR1_460EX_L2BISTSTS 0x034E +#define PESDR1_460EX_L3BISTSTS 0x034F +#define PESDR1_460EX_L0CDRCTL 0x0350 +#define PESDR1_460EX_L1CDRCTL 0x0351 +#define PESDR1_460EX_L2CDRCTL 0x0352 +#define PESDR1_460EX_L3CDRCTL 0x0353 +#define PESDR1_460EX_L0DRV 0x0354 +#define PESDR1_460EX_L1DRV 0x0355 +#define PESDR1_460EX_L2DRV 0x0356 +#define PESDR1_460EX_L3DRV 0x0357 +#define PESDR1_460EX_L0REC 0x0358 +#define PESDR1_460EX_L1REC 0x0359 +#define PESDR1_460EX_L2REC 0x035A +#define PESDR1_460EX_L3REC 0x035B +#define PESDR1_460EX_L0LPB 0x035C +#define PESDR1_460EX_L1LPB 0x035D +#define PESDR1_460EX_L2LPB 0x035E +#define PESDR1_460EX_L3LPB 0x035F +#define PESDR1_460EX_L0CLK 0x0360 +#define PESDR1_460EX_L1CLK 0x0361 +#define PESDR1_460EX_L2CLK 0x0362 +#define PESDR1_460EX_L3CLK 0x0363 +#define PESDR1_460EX_PHY_CTL_RST 0x0364 +#define PESDR1_460EX_RSTSTA 0x0365 +#define PESDR1_460EX_OBS 0x0366 +#define PESDR1_460EX_L0ERRC 0x0368 +#define PESDR1_460EX_L1ERRC 0x0369 +#define PESDR1_460EX_L2ERRC 0x036A +#define PESDR1_460EX_L3ERRC 0x036B +#define PESDR0_460EX_IHS1 0x036C +#define PESDR0_460EX_IHS2 0x036D + +/* + * 460SX additional DCRs + */ +#define PESDRn_460SX_RCEI 0x02 + +#define PESDR0_460SX_HSSL0DAMP 0x320 +#define PESDR0_460SX_HSSL1DAMP 0x321 +#define PESDR0_460SX_HSSL2DAMP 0x322 +#define PESDR0_460SX_HSSL3DAMP 0x323 +#define PESDR0_460SX_HSSL4DAMP 0x324 +#define PESDR0_460SX_HSSL5DAMP 0x325 +#define PESDR0_460SX_HSSL6DAMP 0x326 +#define PESDR0_460SX_HSSL7DAMP 0x327 + +#define PESDR1_460SX_HSSL0DAMP 0x354 +#define PESDR1_460SX_HSSL1DAMP 0x355 +#define PESDR1_460SX_HSSL2DAMP 0x356 +#define PESDR1_460SX_HSSL3DAMP 0x357 + +#define PESDR2_460SX_HSSL0DAMP 0x384 +#define PESDR2_460SX_HSSL1DAMP 0x385 +#define PESDR2_460SX_HSSL2DAMP 0x386 +#define PESDR2_460SX_HSSL3DAMP 0x387 + +#define PESDR0_460SX_HSSL0COEFA 0x328 +#define PESDR0_460SX_HSSL1COEFA 0x329 +#define PESDR0_460SX_HSSL2COEFA 0x32A +#define PESDR0_460SX_HSSL3COEFA 0x32B +#define PESDR0_460SX_HSSL4COEFA 0x32C +#define PESDR0_460SX_HSSL5COEFA 0x32D +#define PESDR0_460SX_HSSL6COEFA 0x32E +#define PESDR0_460SX_HSSL7COEFA 0x32F + +#define PESDR1_460SX_HSSL0COEFA 0x358 +#define PESDR1_460SX_HSSL1COEFA 0x359 +#define PESDR1_460SX_HSSL2COEFA 0x35A +#define PESDR1_460SX_HSSL3COEFA 0x35B + +#define PESDR2_460SX_HSSL0COEFA 0x388 +#define PESDR2_460SX_HSSL1COEFA 0x389 +#define PESDR2_460SX_HSSL2COEFA 0x38A +#define PESDR2_460SX_HSSL3COEFA 0x38B + +#define PESDR0_460SX_HSSL1CALDRV 0x339 +#define PESDR1_460SX_HSSL1CALDRV 0x361 +#define PESDR2_460SX_HSSL1CALDRV 0x391 + +#define PESDR0_460SX_HSSSLEW 0x338 +#define PESDR1_460SX_HSSSLEW 0x360 +#define PESDR2_460SX_HSSSLEW 0x390 + +#define PESDR0_460SX_HSSCTLSET 0x31E +#define PESDR1_460SX_HSSCTLSET 0x352 +#define PESDR2_460SX_HSSCTLSET 0x382 + +#define PESDR0_460SX_RCSSET 0x304 +#define PESDR1_460SX_RCSSET 0x344 +#define PESDR2_460SX_RCSSET 0x374 +/* + * Of the above, some are common offsets from the base + */ +#define PESDRn_UTLSET1 0x00 +#define PESDRn_UTLSET2 0x01 +#define PESDRn_DLPSET 0x02 +#define PESDRn_LOOP 0x03 +#define PESDRn_RCSSET 0x04 +#define PESDRn_RCSSTS 0x05 + +/* 440spe only */ +#define PESDRn_440SPE_HSSL0SET1 0x06 +#define PESDRn_440SPE_HSSL0SET2 0x07 +#define PESDRn_440SPE_HSSL0STS 0x08 +#define PESDRn_440SPE_HSSL1SET1 0x09 +#define PESDRn_440SPE_HSSL1SET2 0x0a +#define PESDRn_440SPE_HSSL1STS 0x0b +#define PESDRn_440SPE_HSSL2SET1 0x0c +#define PESDRn_440SPE_HSSL2SET2 0x0d +#define PESDRn_440SPE_HSSL2STS 0x0e +#define PESDRn_440SPE_HSSL3SET1 0x0f +#define PESDRn_440SPE_HSSL3SET2 0x10 +#define PESDRn_440SPE_HSSL3STS 0x11 + +/* 440spe port 0 only */ +#define PESDRn_440SPE_HSSL4SET1 0x12 +#define PESDRn_440SPE_HSSL4SET2 0x13 +#define PESDRn_440SPE_HSSL4STS 0x14 +#define PESDRn_440SPE_HSSL5SET1 0x15 +#define PESDRn_440SPE_HSSL5SET2 0x16 +#define PESDRn_440SPE_HSSL5STS 0x17 +#define PESDRn_440SPE_HSSL6SET1 0x18 +#define PESDRn_440SPE_HSSL6SET2 0x19 +#define PESDRn_440SPE_HSSL6STS 0x1a +#define PESDRn_440SPE_HSSL7SET1 0x1b +#define PESDRn_440SPE_HSSL7SET2 0x1c +#define PESDRn_440SPE_HSSL7STS 0x1d + +/* 405ex only */ +#define PESDRn_405EX_PHYSET1 0x06 +#define PESDRn_405EX_PHYSET2 0x07 +#define PESDRn_405EX_PHYSTA 0x0c + +/* + * UTL register offsets + */ +#define PEUTL_PBCTL 0x00 +#define PEUTL_PBBSZ 0x20 +#define PEUTL_OPDBSZ 0x68 +#define PEUTL_IPHBSZ 0x70 +#define PEUTL_IPDBSZ 0x78 +#define PEUTL_OUTTR 0x90 +#define PEUTL_INTR 0x98 +#define PEUTL_PCTL 0xa0 +#define PEUTL_RCSTA 0xB0 +#define PEUTL_RCIRQEN 0xb8 + +/* + * Config space register offsets + */ +#define PECFG_ECRTCTL 0x074 + +#define PECFG_BAR0LMPA 0x210 +#define PECFG_BAR0HMPA 0x214 +#define PECFG_BAR1MPA 0x218 +#define PECFG_BAR2LMPA 0x220 +#define PECFG_BAR2HMPA 0x224 + +#define PECFG_PIMEN 0x33c +#define PECFG_PIM0LAL 0x340 +#define PECFG_PIM0LAH 0x344 +#define PECFG_PIM1LAL 0x348 +#define PECFG_PIM1LAH 0x34c +#define PECFG_PIM01SAL 0x350 +#define PECFG_PIM01SAH 0x354 + +#define PECFG_POM0LAL 0x380 +#define PECFG_POM0LAH 0x384 +#define PECFG_POM1LAL 0x388 +#define PECFG_POM1LAH 0x38c +#define PECFG_POM2LAL 0x390 +#define PECFG_POM2LAH 0x394 + +/* 460sx only */ +#define PECFG_460SX_DLLSTA 0x3f8 + +/* 460sx Bit Mappings */ +#define PECFG_460SX_DLLSTA_LINKUP 0x00000010 +#define DCRO_PEGPL_460SX_OMR1MSKL_UOT 0x00000004 + +/* PEGPL Bit Mappings */ +#define DCRO_PEGPL_OMRxMSKL_VAL 0x00000001 +#define DCRO_PEGPL_OMR1MSKL_UOT 0x00000002 +#define DCRO_PEGPL_OMR3MSKL_IO 0x00000002 + +/* 476FPE */ +#define PCCFG_LCPA 0x270 +#define PECFG_TLDLP 0x3F8 +#define PECFG_TLDLP_LNKUP 0x00000008 +#define PECFG_TLDLP_PRESENT 0x00000010 +#define DCRO_PEGPL_476FPE_OMR1MSKL_UOT 0x00000004 + +/* SDR Bit Mappings */ +#define PESDRx_RCSSET_HLDPLB 0x10000000 +#define PESDRx_RCSSET_RSTGU 0x01000000 +#define PESDRx_RCSSET_RDY 0x00100000 +#define PESDRx_RCSSET_RSTDL 0x00010000 +#define PESDRx_RCSSET_RSTPYN 0x00001000 + +enum +{ + PTYPE_ENDPOINT = 0x0, + PTYPE_LEGACY_ENDPOINT = 0x1, + PTYPE_ROOT_PORT = 0x4, + + LNKW_X1 = 0x1, + LNKW_X4 = 0x4, + LNKW_X8 = 0x8 +}; + + +#endif /* __PPC4XX_PCI_H__ */ diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c new file mode 100644 index 0000000000..b2d940437a --- /dev/null +++ b/arch/powerpc/platforms/4xx/soc.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * IBM/AMCC PPC4xx SoC setup code + * + * Copyright 2008 DENX Software Engineering, Stefan Roese + * + * L2 cache routines cloned from arch/ppc/syslib/ibm440gx_common.c which is: + * Eugene Surovegin or + * Copyright (c) 2003 - 2006 Zultys Technologies + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static u32 dcrbase_l2c; + +/* + * L2-cache + */ + +/* Issue L2C diagnostic command */ +static inline u32 l2c_diag(u32 addr) +{ + mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, addr); + mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_DIAG); + while (!(mfdcr(dcrbase_l2c + DCRN_L2C0_SR) & L2C_SR_CC)) + ; + + return mfdcr(dcrbase_l2c + DCRN_L2C0_DATA); +} + +static irqreturn_t l2c_error_handler(int irq, void *dev) +{ + u32 sr = mfdcr(dcrbase_l2c + DCRN_L2C0_SR); + + if (sr & L2C_SR_CPE) { + /* Read cache trapped address */ + u32 addr = l2c_diag(0x42000000); + printk(KERN_EMERG "L2C: Cache Parity Error, addr[16:26] = 0x%08x\n", + addr); + } + if (sr & L2C_SR_TPE) { + /* Read tag trapped address */ + u32 addr = l2c_diag(0x82000000) >> 16; + printk(KERN_EMERG "L2C: Tag Parity Error, addr[16:26] = 0x%08x\n", + addr); + } + + /* Clear parity errors */ + if (sr & (L2C_SR_CPE | L2C_SR_TPE)){ + mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, 0); + mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_CCP | L2C_CMD_CTE); + } else { + printk(KERN_EMERG "L2C: LRU error\n"); + } + + return IRQ_HANDLED; +} + +static int __init ppc4xx_l2c_probe(void) +{ + struct device_node *np; + u32 r; + unsigned long flags; + int irq; + const u32 *dcrreg; + u32 dcrbase_isram; + int len; + const u32 *prop; + u32 l2_size; + + np = of_find_compatible_node(NULL, NULL, "ibm,l2-cache"); + if (!np) + return 0; + + /* Get l2 cache size */ + prop = of_get_property(np, "cache-size", NULL); + if (prop == NULL) { + printk(KERN_ERR "%pOF: Can't get cache-size!\n", np); + of_node_put(np); + return -ENODEV; + } + l2_size = prop[0]; + + /* Map DCRs */ + dcrreg = of_get_property(np, "dcr-reg", &len); + if (!dcrreg || (len != 4 * sizeof(u32))) { + printk(KERN_ERR "%pOF: Can't get DCR register base !", np); + of_node_put(np); + return -ENODEV; + } + dcrbase_isram = dcrreg[0]; + dcrbase_l2c = dcrreg[2]; + + /* Get and map irq number from device tree */ + irq = irq_of_parse_and_map(np, 0); + if (!irq) { + printk(KERN_ERR "irq_of_parse_and_map failed\n"); + of_node_put(np); + return -ENODEV; + } + + /* Install error handler */ + if (request_irq(irq, l2c_error_handler, 0, "L2C", 0) < 0) { + printk(KERN_ERR "Cannot install L2C error handler" + ", cache is not enabled\n"); + of_node_put(np); + return -ENODEV; + } + + local_irq_save(flags); + asm volatile ("sync" ::: "memory"); + + /* Disable SRAM */ + mtdcr(dcrbase_isram + DCRN_SRAM0_DPC, + mfdcr(dcrbase_isram + DCRN_SRAM0_DPC) & ~SRAM_DPC_ENABLE); + mtdcr(dcrbase_isram + DCRN_SRAM0_SB0CR, + mfdcr(dcrbase_isram + DCRN_SRAM0_SB0CR) & ~SRAM_SBCR_BU_MASK); + mtdcr(dcrbase_isram + DCRN_SRAM0_SB1CR, + mfdcr(dcrbase_isram + DCRN_SRAM0_SB1CR) & ~SRAM_SBCR_BU_MASK); + mtdcr(dcrbase_isram + DCRN_SRAM0_SB2CR, + mfdcr(dcrbase_isram + DCRN_SRAM0_SB2CR) & ~SRAM_SBCR_BU_MASK); + mtdcr(dcrbase_isram + DCRN_SRAM0_SB3CR, + mfdcr(dcrbase_isram + DCRN_SRAM0_SB3CR) & ~SRAM_SBCR_BU_MASK); + + /* Enable L2_MODE without ICU/DCU */ + r = mfdcr(dcrbase_l2c + DCRN_L2C0_CFG) & + ~(L2C_CFG_ICU | L2C_CFG_DCU | L2C_CFG_SS_MASK); + r |= L2C_CFG_L2M | L2C_CFG_SS_256; + mtdcr(dcrbase_l2c + DCRN_L2C0_CFG, r); + + mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, 0); + + /* Hardware Clear Command */ + mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_HCC); + while (!(mfdcr(dcrbase_l2c + DCRN_L2C0_SR) & L2C_SR_CC)) + ; + + /* Clear Cache Parity and Tag Errors */ + mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_CCP | L2C_CMD_CTE); + + /* Enable 64G snoop region starting at 0 */ + r = mfdcr(dcrbase_l2c + DCRN_L2C0_SNP0) & + ~(L2C_SNP_BA_MASK | L2C_SNP_SSR_MASK); + r |= L2C_SNP_SSR_32G | L2C_SNP_ESR; + mtdcr(dcrbase_l2c + DCRN_L2C0_SNP0, r); + + r = mfdcr(dcrbase_l2c + DCRN_L2C0_SNP1) & + ~(L2C_SNP_BA_MASK | L2C_SNP_SSR_MASK); + r |= 0x80000000 | L2C_SNP_SSR_32G | L2C_SNP_ESR; + mtdcr(dcrbase_l2c + DCRN_L2C0_SNP1, r); + + asm volatile ("sync" ::: "memory"); + + /* Enable ICU/DCU ports */ + r = mfdcr(dcrbase_l2c + DCRN_L2C0_CFG); + r &= ~(L2C_CFG_DCW_MASK | L2C_CFG_PMUX_MASK | L2C_CFG_PMIM + | L2C_CFG_TPEI | L2C_CFG_CPEI | L2C_CFG_NAM | L2C_CFG_NBRM); + r |= L2C_CFG_ICU | L2C_CFG_DCU | L2C_CFG_TPC | L2C_CFG_CPC | L2C_CFG_FRAN + | L2C_CFG_CPIM | L2C_CFG_TPIM | L2C_CFG_LIM | L2C_CFG_SMCM; + + /* Check for 460EX/GT special handling */ + if (of_device_is_compatible(np, "ibm,l2-cache-460ex") || + of_device_is_compatible(np, "ibm,l2-cache-460gt")) + r |= L2C_CFG_RDBW; + + mtdcr(dcrbase_l2c + DCRN_L2C0_CFG, r); + + asm volatile ("sync; isync" ::: "memory"); + local_irq_restore(flags); + + printk(KERN_INFO "%dk L2-cache enabled\n", l2_size >> 10); + + of_node_put(np); + return 0; +} +arch_initcall(ppc4xx_l2c_probe); + +/* + * Apply a system reset. Alternatively a board specific value may be + * provided via the "reset-type" property in the cpu node. + */ +void ppc4xx_reset_system(char *cmd) +{ + struct device_node *np; + u32 reset_type = DBCR0_RST_SYSTEM; + const u32 *prop; + + np = of_get_cpu_node(0, NULL); + if (np) { + prop = of_get_property(np, "reset-type", NULL); + + /* + * Check if property exists and if it is in range: + * 1 - PPC4xx core reset + * 2 - PPC4xx chip reset + * 3 - PPC4xx system reset (default) + */ + if ((prop) && ((prop[0] >= 1) && (prop[0] <= 3))) + reset_type = prop[0] << 28; + } + + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | reset_type); + + while (1) + ; /* Just in case the reset doesn't work */ +} diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c new file mode 100644 index 0000000000..e3e148b9dd --- /dev/null +++ b/arch/powerpc/platforms/4xx/uic.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/sysdev/uic.c + * + * IBM PowerPC 4xx Universal Interrupt Controller + * + * Copyright 2007 David Gibson , IBM Corporation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NR_UIC_INTS 32 + +#define UIC_SR 0x0 +#define UIC_ER 0x2 +#define UIC_CR 0x3 +#define UIC_PR 0x4 +#define UIC_TR 0x5 +#define UIC_MSR 0x6 +#define UIC_VR 0x7 +#define UIC_VCR 0x8 + +struct uic *primary_uic; + +struct uic { + int index; + int dcrbase; + + raw_spinlock_t lock; + + /* The remapper for this UIC */ + struct irq_domain *irqhost; +}; + +static void uic_unmask_irq(struct irq_data *d) +{ + struct uic *uic = irq_data_get_irq_chip_data(d); + unsigned int src = irqd_to_hwirq(d); + unsigned long flags; + u32 er, sr; + + sr = 1 << (31-src); + raw_spin_lock_irqsave(&uic->lock, flags); + /* ack level-triggered interrupts here */ + if (irqd_is_level_type(d)) + mtdcr(uic->dcrbase + UIC_SR, sr); + er = mfdcr(uic->dcrbase + UIC_ER); + er |= sr; + mtdcr(uic->dcrbase + UIC_ER, er); + raw_spin_unlock_irqrestore(&uic->lock, flags); +} + +static void uic_mask_irq(struct irq_data *d) +{ + struct uic *uic = irq_data_get_irq_chip_data(d); + unsigned int src = irqd_to_hwirq(d); + unsigned long flags; + u32 er; + + raw_spin_lock_irqsave(&uic->lock, flags); + er = mfdcr(uic->dcrbase + UIC_ER); + er &= ~(1 << (31 - src)); + mtdcr(uic->dcrbase + UIC_ER, er); + raw_spin_unlock_irqrestore(&uic->lock, flags); +} + +static void uic_ack_irq(struct irq_data *d) +{ + struct uic *uic = irq_data_get_irq_chip_data(d); + unsigned int src = irqd_to_hwirq(d); + unsigned long flags; + + raw_spin_lock_irqsave(&uic->lock, flags); + mtdcr(uic->dcrbase + UIC_SR, 1 << (31-src)); + raw_spin_unlock_irqrestore(&uic->lock, flags); +} + +static void uic_mask_ack_irq(struct irq_data *d) +{ + struct uic *uic = irq_data_get_irq_chip_data(d); + unsigned int src = irqd_to_hwirq(d); + unsigned long flags; + u32 er, sr; + + sr = 1 << (31-src); + raw_spin_lock_irqsave(&uic->lock, flags); + er = mfdcr(uic->dcrbase + UIC_ER); + er &= ~sr; + mtdcr(uic->dcrbase + UIC_ER, er); + /* On the UIC, acking (i.e. clearing the SR bit) + * a level irq will have no effect if the interrupt + * is still asserted by the device, even if + * the interrupt is already masked. Therefore + * we only ack the egde interrupts here, while + * level interrupts are ack'ed after the actual + * isr call in the uic_unmask_irq() + */ + if (!irqd_is_level_type(d)) + mtdcr(uic->dcrbase + UIC_SR, sr); + raw_spin_unlock_irqrestore(&uic->lock, flags); +} + +static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type) +{ + struct uic *uic = irq_data_get_irq_chip_data(d); + unsigned int src = irqd_to_hwirq(d); + unsigned long flags; + int trigger, polarity; + u32 tr, pr, mask; + + switch (flow_type & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_NONE: + uic_mask_irq(d); + return 0; + + case IRQ_TYPE_EDGE_RISING: + trigger = 1; polarity = 1; + break; + case IRQ_TYPE_EDGE_FALLING: + trigger = 1; polarity = 0; + break; + case IRQ_TYPE_LEVEL_HIGH: + trigger = 0; polarity = 1; + break; + case IRQ_TYPE_LEVEL_LOW: + trigger = 0; polarity = 0; + break; + default: + return -EINVAL; + } + + mask = ~(1 << (31 - src)); + + raw_spin_lock_irqsave(&uic->lock, flags); + tr = mfdcr(uic->dcrbase + UIC_TR); + pr = mfdcr(uic->dcrbase + UIC_PR); + tr = (tr & mask) | (trigger << (31-src)); + pr = (pr & mask) | (polarity << (31-src)); + + mtdcr(uic->dcrbase + UIC_PR, pr); + mtdcr(uic->dcrbase + UIC_TR, tr); + mtdcr(uic->dcrbase + UIC_SR, ~mask); + + raw_spin_unlock_irqrestore(&uic->lock, flags); + + return 0; +} + +static struct irq_chip uic_irq_chip = { + .name = "UIC", + .irq_unmask = uic_unmask_irq, + .irq_mask = uic_mask_irq, + .irq_mask_ack = uic_mask_ack_irq, + .irq_ack = uic_ack_irq, + .irq_set_type = uic_set_irq_type, +}; + +static int uic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct uic *uic = h->host_data; + + irq_set_chip_data(virq, uic); + /* Despite the name, handle_level_irq() works for both level + * and edge irqs on UIC. FIXME: check this is correct */ + irq_set_chip_and_handler(virq, &uic_irq_chip, handle_level_irq); + + /* Set default irq type */ + irq_set_irq_type(virq, IRQ_TYPE_NONE); + + return 0; +} + +static const struct irq_domain_ops uic_host_ops = { + .map = uic_host_map, + .xlate = irq_domain_xlate_twocell, +}; + +static void uic_irq_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irq_data *idata = irq_desc_get_irq_data(desc); + struct uic *uic = irq_desc_get_handler_data(desc); + u32 msr; + int src; + + raw_spin_lock(&desc->lock); + if (irqd_is_level_type(idata)) + chip->irq_mask(idata); + else + chip->irq_mask_ack(idata); + raw_spin_unlock(&desc->lock); + + msr = mfdcr(uic->dcrbase + UIC_MSR); + if (!msr) /* spurious interrupt */ + goto uic_irq_ret; + + src = 32 - ffs(msr); + + generic_handle_domain_irq(uic->irqhost, src); + +uic_irq_ret: + raw_spin_lock(&desc->lock); + if (irqd_is_level_type(idata)) + chip->irq_ack(idata); + if (!irqd_irq_disabled(idata) && chip->irq_unmask) + chip->irq_unmask(idata); + raw_spin_unlock(&desc->lock); +} + +static struct uic * __init uic_init_one(struct device_node *node) +{ + struct uic *uic; + const u32 *indexp, *dcrreg; + int len; + + BUG_ON(! of_device_is_compatible(node, "ibm,uic")); + + uic = kzalloc(sizeof(*uic), GFP_KERNEL); + if (! uic) + return NULL; /* FIXME: panic? */ + + raw_spin_lock_init(&uic->lock); + indexp = of_get_property(node, "cell-index", &len); + if (!indexp || (len != sizeof(u32))) { + printk(KERN_ERR "uic: Device node %pOF has missing or invalid " + "cell-index property\n", node); + return NULL; + } + uic->index = *indexp; + + dcrreg = of_get_property(node, "dcr-reg", &len); + if (!dcrreg || (len != 2*sizeof(u32))) { + printk(KERN_ERR "uic: Device node %pOF has missing or invalid " + "dcr-reg property\n", node); + return NULL; + } + uic->dcrbase = *dcrreg; + + uic->irqhost = irq_domain_add_linear(node, NR_UIC_INTS, &uic_host_ops, + uic); + if (! uic->irqhost) + return NULL; /* FIXME: panic? */ + + /* Start with all interrupts disabled, level and non-critical */ + mtdcr(uic->dcrbase + UIC_ER, 0); + mtdcr(uic->dcrbase + UIC_CR, 0); + mtdcr(uic->dcrbase + UIC_TR, 0); + /* Clear any pending interrupts, in case the firmware left some */ + mtdcr(uic->dcrbase + UIC_SR, 0xffffffff); + + printk ("UIC%d (%d IRQ sources) at DCR 0x%x\n", uic->index, + NR_UIC_INTS, uic->dcrbase); + + return uic; +} + +void __init uic_init_tree(void) +{ + struct device_node *np; + struct uic *uic; + const u32 *interrupts; + + /* First locate and initialize the top-level UIC */ + for_each_compatible_node(np, NULL, "ibm,uic") { + interrupts = of_get_property(np, "interrupts", NULL); + if (!interrupts) + break; + } + + BUG_ON(!np); /* uic_init_tree() assumes there's a UIC as the + * top-level interrupt controller */ + primary_uic = uic_init_one(np); + if (!primary_uic) + panic("Unable to initialize primary UIC %pOF\n", np); + + irq_set_default_host(primary_uic->irqhost); + of_node_put(np); + + /* The scan again for cascaded UICs */ + for_each_compatible_node(np, NULL, "ibm,uic") { + interrupts = of_get_property(np, "interrupts", NULL); + if (interrupts) { + /* Secondary UIC */ + int cascade_virq; + + uic = uic_init_one(np); + if (! uic) + panic("Unable to initialize a secondary UIC %pOF\n", + np); + + cascade_virq = irq_of_parse_and_map(np, 0); + + irq_set_handler_data(cascade_virq, uic); + irq_set_chained_handler(cascade_virq, uic_irq_cascade); + + /* FIXME: setup critical cascade?? */ + } + } +} + +/* Return an interrupt vector or 0 if no interrupt is pending. */ +unsigned int uic_get_irq(void) +{ + u32 msr; + int src; + + BUG_ON(! primary_uic); + + msr = mfdcr(primary_uic->dcrbase + UIC_MSR); + src = 32 - ffs(msr); + + return irq_linear_revmap(primary_uic->irqhost, src); +} diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig new file mode 100644 index 0000000000..deecede787 --- /dev/null +++ b/arch/powerpc/platforms/512x/Kconfig @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MPC512x + bool "512x-based boards" + depends on PPC_BOOK3S_32 + select COMMON_CLK + select FSL_SOC + select IPIC + select HAVE_PCI + select FSL_PCI if PCI + select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD + select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD + +config MPC512x_LPBFIFO + tristate "MPC512x LocalPlus Bus FIFO driver" + depends on PPC_MPC512x && MPC512X_DMA + help + Enable support for Freescale MPC512x LocalPlus Bus FIFO (SCLPC). + +config MPC5121_ADS + bool "Freescale MPC5121E ADS" + depends on PPC_MPC512x + select DEFAULT_UIMAGE + help + This option enables support for the MPC5121E ADS board. + +config MPC512x_GENERIC + bool "Generic support for simple MPC512x based boards" + depends on PPC_MPC512x + select DEFAULT_UIMAGE + help + This option enables support for simple MPC512x based boards + which do not need custom platform specific setup. + + Compatible boards include: Protonic LVT base boards (ZANMCU + and VICVT2), Freescale MPC5125 Tower system. + +config PDM360NG + bool "ifm PDM360NG board" + depends on PPC_MPC512x + select DEFAULT_UIMAGE + help + This option enables support for the PDM360NG board. diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile new file mode 100644 index 0000000000..2daf22ee26 --- /dev/null +++ b/arch/powerpc/platforms/512x/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Freescale PowerPC 512x linux kernel. +# +obj-$(CONFIG_COMMON_CLK) += clock-commonclk.o +obj-y += mpc512x_shared.o +obj-$(CONFIG_MPC5121_ADS) += mpc5121_ads.o mpc5121_ads_cpld.o +obj-$(CONFIG_MPC512x_GENERIC) += mpc512x_generic.o +obj-$(CONFIG_MPC512x_LPBFIFO) += mpc512x_lpbfifo.o +obj-$(CONFIG_PDM360NG) += pdm360ng.o diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c new file mode 100644 index 0000000000..079cb3627e --- /dev/null +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -0,0 +1,1224 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2013 DENX Software Engineering + * + * Gerhard Sittig, + * + * common clock driver support for the MPC512x platform + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc512x.h" /* our public mpc5121_clk_init() API */ + +/* helpers to keep the MCLK intermediates "somewhere" in our table */ +enum { + MCLK_IDX_MUX0, + MCLK_IDX_EN0, + MCLK_IDX_DIV0, + MCLK_MAX_IDX, +}; + +#define NR_PSCS 12 +#define NR_MSCANS 4 +#define NR_SPDIFS 1 +#define NR_OUTCLK 4 +#define NR_MCLKS (NR_PSCS + NR_MSCANS + NR_SPDIFS + NR_OUTCLK) + +/* extend the public set of clocks by adding internal slots for management */ +enum { + /* arrange for adjacent numbers after the public set */ + MPC512x_CLK_START_PRIVATE = MPC512x_CLK_LAST_PUBLIC, + /* clocks which aren't announced to the public */ + MPC512x_CLK_DDR, + MPC512x_CLK_MEM, + MPC512x_CLK_IIM, + /* intermediates in div+gate combos or fractional dividers */ + MPC512x_CLK_DDR_UG, + MPC512x_CLK_SDHC_x4, + MPC512x_CLK_SDHC_UG, + MPC512x_CLK_SDHC2_UG, + MPC512x_CLK_DIU_x4, + MPC512x_CLK_DIU_UG, + MPC512x_CLK_MBX_BUS_UG, + MPC512x_CLK_MBX_UG, + MPC512x_CLK_MBX_3D_UG, + MPC512x_CLK_PCI_UG, + MPC512x_CLK_NFC_UG, + MPC512x_CLK_LPC_UG, + MPC512x_CLK_SPDIF_TX_IN, + /* intermediates for the mux+gate+div+mux MCLK generation */ + MPC512x_CLK_MCLKS_FIRST, + MPC512x_CLK_MCLKS_LAST = MPC512x_CLK_MCLKS_FIRST + + NR_MCLKS * MCLK_MAX_IDX, + /* internal, symbolic spec for the number of slots */ + MPC512x_CLK_LAST_PRIVATE, +}; + +/* data required for the OF clock provider registration */ +static struct clk *clks[MPC512x_CLK_LAST_PRIVATE]; +static struct clk_onecell_data clk_data; + +/* CCM register access */ +static struct mpc512x_ccm __iomem *clkregs; +static DEFINE_SPINLOCK(clklock); + +/* SoC variants {{{ */ + +/* + * tell SoC variants apart as they are rather similar yet not identical, + * cache the result in an enum to not repeatedly run the expensive OF test + * + * MPC5123 is an MPC5121 without the MBX graphics accelerator + * + * MPC5125 has many more differences: no MBX, no AXE, no VIU, no SPDIF, + * no PATA, no SATA, no PCI, two FECs (of different compatibility name), + * only 10 PSCs (of different compatibility name), two SDHCs, different + * NFC IP block, output clocks, system PLL status query, different CPMF + * interpretation, no CFM, different fourth PSC/CAN mux0 input -- yet + * those differences can get folded into this clock provider support + * code and don't warrant a separate highly redundant implementation + */ + +static enum soc_type { + MPC512x_SOC_MPC5121, + MPC512x_SOC_MPC5123, + MPC512x_SOC_MPC5125, +} soc; + +static void __init mpc512x_clk_determine_soc(void) +{ + if (of_machine_is_compatible("fsl,mpc5121")) { + soc = MPC512x_SOC_MPC5121; + return; + } + if (of_machine_is_compatible("fsl,mpc5123")) { + soc = MPC512x_SOC_MPC5123; + return; + } + if (of_machine_is_compatible("fsl,mpc5125")) { + soc = MPC512x_SOC_MPC5125; + return; + } +} + +static bool __init soc_has_mbx(void) +{ + if (soc == MPC512x_SOC_MPC5121) + return true; + return false; +} + +static bool __init soc_has_axe(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool __init soc_has_viu(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool __init soc_has_spdif(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool __init soc_has_pata(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool __init soc_has_sata(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool __init soc_has_pci(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return false; + return true; +} + +static bool __init soc_has_fec2(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static int __init soc_max_pscnum(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return 10; + return 12; +} + +static bool __init soc_has_sdhc2(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool __init soc_has_nfc_5125(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool __init soc_has_outclk(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool __init soc_has_cpmf_0_bypass(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +static bool __init soc_has_mclk_mux0_canin(void) +{ + if (soc == MPC512x_SOC_MPC5125) + return true; + return false; +} + +/* }}} SoC variants */ +/* common clk API wrappers {{{ */ + +/* convenience wrappers around the common clk API */ +static inline struct clk *mpc512x_clk_fixed(const char *name, int rate) +{ + return clk_register_fixed_rate(NULL, name, NULL, 0, rate); +} + +static inline struct clk *mpc512x_clk_factor( + const char *name, const char *parent_name, + int mul, int div) +{ + int clkflags; + + clkflags = CLK_SET_RATE_PARENT; + return clk_register_fixed_factor(NULL, name, parent_name, clkflags, + mul, div); +} + +static inline struct clk *mpc512x_clk_divider( + const char *name, const char *parent_name, u8 clkflags, + u32 __iomem *reg, u8 pos, u8 len, int divflags) +{ + divflags |= CLK_DIVIDER_BIG_ENDIAN; + return clk_register_divider(NULL, name, parent_name, clkflags, + reg, pos, len, divflags, &clklock); +} + +static inline struct clk *mpc512x_clk_divtable( + const char *name, const char *parent_name, + u32 __iomem *reg, u8 pos, u8 len, + const struct clk_div_table *divtab) +{ + u8 divflags; + + divflags = CLK_DIVIDER_BIG_ENDIAN; + return clk_register_divider_table(NULL, name, parent_name, 0, + reg, pos, len, divflags, + divtab, &clklock); +} + +static inline struct clk *mpc512x_clk_gated( + const char *name, const char *parent_name, + u32 __iomem *reg, u8 pos) +{ + int clkflags; + u8 gateflags; + + clkflags = CLK_SET_RATE_PARENT; + gateflags = CLK_GATE_BIG_ENDIAN; + return clk_register_gate(NULL, name, parent_name, clkflags, + reg, pos, gateflags, &clklock); +} + +static inline struct clk *mpc512x_clk_muxed(const char *name, + const char **parent_names, int parent_count, + u32 __iomem *reg, u8 pos, u8 len) +{ + int clkflags; + u8 muxflags; + + clkflags = CLK_SET_RATE_PARENT; + muxflags = CLK_MUX_BIG_ENDIAN; + return clk_register_mux(NULL, name, + parent_names, parent_count, clkflags, + reg, pos, len, muxflags, &clklock); +} + +/* }}} common clk API wrappers */ + +/* helper to isolate a bit field from a register */ +static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len) +{ + uint32_t val; + + val = in_be32(reg); + val >>= pos; + val &= (1 << len) - 1; + return val; +} + +/* get the SPMF and translate it into the "sys pll" multiplier */ +static int __init get_spmf_mult(void) +{ + static int spmf_to_mult[] = { + 68, 1, 12, 16, 20, 24, 28, 32, + 36, 40, 44, 48, 52, 56, 60, 64, + }; + int spmf; + + spmf = get_bit_field(&clkregs->spmr, 24, 4); + return spmf_to_mult[spmf]; +} + +/* + * get the SYS_DIV value and translate it into a divide factor + * + * values returned from here are a multiple of the real factor since the + * divide ratio is fractional + */ +static int __init get_sys_div_x2(void) +{ + static int sysdiv_code_to_x2[] = { + 4, 5, 6, 7, 8, 9, 10, 14, + 12, 16, 18, 22, 20, 24, 26, 30, + 28, 32, 34, 38, 36, 40, 42, 46, + 44, 48, 50, 54, 52, 56, 58, 62, + 60, 64, 66, + }; + int divcode; + + divcode = get_bit_field(&clkregs->scfr2, 26, 6); + return sysdiv_code_to_x2[divcode]; +} + +/* + * get the CPMF value and translate it into a multiplier factor + * + * values returned from here are a multiple of the real factor since the + * multiplier ratio is fractional + */ +static int __init get_cpmf_mult_x2(void) +{ + static int cpmf_to_mult_x36[] = { + /* 0b000 is "times 36" */ + 72, 2, 2, 3, 4, 5, 6, 7, + }; + static int cpmf_to_mult_0by[] = { + /* 0b000 is "bypass" */ + 2, 2, 2, 3, 4, 5, 6, 7, + }; + + int *cpmf_to_mult; + int cpmf; + + cpmf = get_bit_field(&clkregs->spmr, 16, 4); + if (soc_has_cpmf_0_bypass()) + cpmf_to_mult = cpmf_to_mult_0by; + else + cpmf_to_mult = cpmf_to_mult_x36; + return cpmf_to_mult[cpmf]; +} + +/* + * some of the clock dividers do scale in a linear way, yet not all of + * their bit combinations are legal; use a divider table to get a + * resulting set of applicable divider values + */ + +/* applies to the IPS_DIV, and PCI_DIV values */ +static const struct clk_div_table divtab_2346[] = { + { .val = 2, .div = 2, }, + { .val = 3, .div = 3, }, + { .val = 4, .div = 4, }, + { .val = 6, .div = 6, }, + { .div = 0, }, +}; + +/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */ +static const struct clk_div_table divtab_1234[] = { + { .val = 1, .div = 1, }, + { .val = 2, .div = 2, }, + { .val = 3, .div = 3, }, + { .val = 4, .div = 4, }, + { .div = 0, }, +}; + +static int __init get_freq_from_dt(char *propname) +{ + struct device_node *np; + const unsigned int *prop; + int val; + + val = 0; + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr"); + if (np) { + prop = of_get_property(np, propname, NULL); + if (prop) + val = *prop; + of_node_put(np); + } + return val; +} + +static void __init mpc512x_clk_preset_data(void) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(clks); i++) + clks[i] = ERR_PTR(-ENODEV); +} + +/* + * - receives the "bus frequency" from the caller (that's the IPS clock + * rate, the historical source of clock information) + * - fetches the system PLL multiplier and divider values as well as the + * IPS divider value from hardware + * - determines the REF clock rate either from the XTAL/OSC spec (if + * there is a device tree node describing the oscillator) or from the + * IPS bus clock (supported for backwards compatibility, such that + * setups without XTAL/OSC specs keep working) + * - creates the "ref" clock item in the clock tree, such that + * subsequent code can create the remainder of the hierarchy (REF -> + * SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div + * values + */ +static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, + int *sys_mul, int *sys_div, + int *ips_div) +{ + struct clk *osc_clk; + int calc_freq; + + /* fetch mul/div factors from the hardware */ + *sys_mul = get_spmf_mult(); + *sys_mul *= 2; /* compensate for the fractional divider */ + *sys_div = get_sys_div_x2(); + *ips_div = get_bit_field(&clkregs->scfr1, 23, 3); + + /* lookup the oscillator clock for its rate */ + osc_clk = of_clk_get_by_name(np, "osc"); + + /* + * either descend from OSC to REF (and in bypassing verify the + * IPS rate), or backtrack from IPS and multiplier values that + * were fetched from hardware to REF and thus to the OSC value + * + * in either case the REF clock gets created here and the + * remainder of the clock tree can get spanned from there + */ + if (!IS_ERR(osc_clk)) { + clks[MPC512x_CLK_REF] = mpc512x_clk_factor("ref", "osc", 1, 1); + calc_freq = clk_get_rate(clks[MPC512x_CLK_REF]); + calc_freq *= *sys_mul; + calc_freq /= *sys_div; + calc_freq /= 2; + calc_freq /= *ips_div; + if (bus_freq && calc_freq != bus_freq) + pr_warn("calc rate %d != OF spec %d\n", + calc_freq, bus_freq); + } else { + calc_freq = bus_freq; /* start with IPS */ + calc_freq *= *ips_div; /* IPS -> CSB */ + calc_freq *= 2; /* CSB -> SYS */ + calc_freq *= *sys_div; /* SYS -> PLL out */ + calc_freq /= *sys_mul; /* PLL out -> REF == OSC */ + clks[MPC512x_CLK_REF] = mpc512x_clk_fixed("ref", calc_freq); + } +} + +/* MCLK helpers {{{ */ + +/* + * helper code for the MCLK subtree setup + * + * the overview in section 5.2.4 of the MPC5121e Reference Manual rev4 + * suggests that all instances of the "PSC clock generation" are equal, + * and that one might re-use the PSC setup for MSCAN clock generation + * (section 5.2.5) as well, at least the logic if not the data for + * description + * + * the details (starting at page 5-20) show differences in the specific + * inputs of the first mux stage ("can clk in", "spdif tx"), and the + * factual non-availability of the second mux stage (it's present yet + * only one input is valid) + * + * the MSCAN clock related registers (starting at page 5-35) all + * reference "spdif clk" at the first mux stage and don't mention any + * "can clk" at all, which somehow is unexpected + * + * TODO re-check the document, and clarify whether the RM is correct in + * the overview or in the details, and whether the difference is a + * clipboard induced error or results from chip revisions + * + * it turns out that the RM rev4 as of 2012-06 talks about "can" for the + * PSCs while RM rev3 as of 2008-10 talks about "spdif", so I guess that + * first a doc update is required which better reflects reality in the + * SoC before the implementation should follow while no questions remain + */ + +/* + * note that this declaration raises a checkpatch warning, but + * it's the very data type dictated by , + * "fixing" this warning will break compilation + */ +static const char *parent_names_mux0_spdif[] = { + "sys", "ref", "psc-mclk-in", "spdif-tx", +}; + +static const char *parent_names_mux0_canin[] = { + "sys", "ref", "psc-mclk-in", "can-clk-in", +}; + +enum mclk_type { + MCLK_TYPE_PSC, + MCLK_TYPE_MSCAN, + MCLK_TYPE_SPDIF, + MCLK_TYPE_OUTCLK, +}; + +struct mclk_setup_data { + enum mclk_type type; + bool has_mclk1; + const char *name_mux0; + const char *name_en0; + const char *name_div0; + const char *parent_names_mux1[2]; + const char *name_mclk; +}; + +#define MCLK_SETUP_DATA_PSC(id) { \ + MCLK_TYPE_PSC, 0, \ + "psc" #id "-mux0", \ + "psc" #id "-en0", \ + "psc" #id "_mclk_div", \ + { "psc" #id "_mclk_div", "dummy", }, \ + "psc" #id "_mclk", \ +} + +#define MCLK_SETUP_DATA_MSCAN(id) { \ + MCLK_TYPE_MSCAN, 0, \ + "mscan" #id "-mux0", \ + "mscan" #id "-en0", \ + "mscan" #id "_mclk_div", \ + { "mscan" #id "_mclk_div", "dummy", }, \ + "mscan" #id "_mclk", \ +} + +#define MCLK_SETUP_DATA_SPDIF { \ + MCLK_TYPE_SPDIF, 1, \ + "spdif-mux0", \ + "spdif-en0", \ + "spdif_mclk_div", \ + { "spdif_mclk_div", "spdif-rx", }, \ + "spdif_mclk", \ +} + +#define MCLK_SETUP_DATA_OUTCLK(id) { \ + MCLK_TYPE_OUTCLK, 0, \ + "out" #id "-mux0", \ + "out" #id "-en0", \ + "out" #id "_mclk_div", \ + { "out" #id "_mclk_div", "dummy", }, \ + "out" #id "_clk", \ +} + +static struct mclk_setup_data mclk_psc_data[] = { + MCLK_SETUP_DATA_PSC(0), + MCLK_SETUP_DATA_PSC(1), + MCLK_SETUP_DATA_PSC(2), + MCLK_SETUP_DATA_PSC(3), + MCLK_SETUP_DATA_PSC(4), + MCLK_SETUP_DATA_PSC(5), + MCLK_SETUP_DATA_PSC(6), + MCLK_SETUP_DATA_PSC(7), + MCLK_SETUP_DATA_PSC(8), + MCLK_SETUP_DATA_PSC(9), + MCLK_SETUP_DATA_PSC(10), + MCLK_SETUP_DATA_PSC(11), +}; + +static struct mclk_setup_data mclk_mscan_data[] = { + MCLK_SETUP_DATA_MSCAN(0), + MCLK_SETUP_DATA_MSCAN(1), + MCLK_SETUP_DATA_MSCAN(2), + MCLK_SETUP_DATA_MSCAN(3), +}; + +static struct mclk_setup_data mclk_spdif_data[] = { + MCLK_SETUP_DATA_SPDIF, +}; + +static struct mclk_setup_data mclk_outclk_data[] = { + MCLK_SETUP_DATA_OUTCLK(0), + MCLK_SETUP_DATA_OUTCLK(1), + MCLK_SETUP_DATA_OUTCLK(2), + MCLK_SETUP_DATA_OUTCLK(3), +}; + +/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */ +static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) +{ + size_t clks_idx_pub, clks_idx_int; + u32 __iomem *mccr_reg; /* MCLK control register (mux, en, div) */ + int div; + + /* derive a few parameters from the component type and index */ + switch (entry->type) { + case MCLK_TYPE_PSC: + clks_idx_pub = MPC512x_CLK_PSC0_MCLK + idx; + clks_idx_int = MPC512x_CLK_MCLKS_FIRST + + (idx) * MCLK_MAX_IDX; + mccr_reg = &clkregs->psc_ccr[idx]; + break; + case MCLK_TYPE_MSCAN: + clks_idx_pub = MPC512x_CLK_MSCAN0_MCLK + idx; + clks_idx_int = MPC512x_CLK_MCLKS_FIRST + + (NR_PSCS + idx) * MCLK_MAX_IDX; + mccr_reg = &clkregs->mscan_ccr[idx]; + break; + case MCLK_TYPE_SPDIF: + clks_idx_pub = MPC512x_CLK_SPDIF_MCLK; + clks_idx_int = MPC512x_CLK_MCLKS_FIRST + + (NR_PSCS + NR_MSCANS) * MCLK_MAX_IDX; + mccr_reg = &clkregs->spccr; + break; + case MCLK_TYPE_OUTCLK: + clks_idx_pub = MPC512x_CLK_OUT0_CLK + idx; + clks_idx_int = MPC512x_CLK_MCLKS_FIRST + + (NR_PSCS + NR_MSCANS + NR_SPDIFS + idx) + * MCLK_MAX_IDX; + mccr_reg = &clkregs->out_ccr[idx]; + break; + default: + return; + } + + /* + * this was grabbed from the PPC_CLOCK implementation, which + * enforced a specific MCLK divider while the clock was gated + * during setup (that's a documented hardware requirement) + * + * the PPC_CLOCK implementation might even have violated the + * "MCLK <= IPS" constraint, the fixed divider value of 1 + * results in a divider of 2 and thus MCLK = SYS/2 which equals + * CSB which is greater than IPS; the serial port setup may have + * adjusted the divider which the clock setup might have left in + * an undesirable state + * + * initial setup is: + * - MCLK 0 from SYS + * - MCLK DIV such to not exceed the IPS clock + * - MCLK 0 enabled + * - MCLK 1 from MCLK DIV + */ + div = clk_get_rate(clks[MPC512x_CLK_SYS]); + div /= clk_get_rate(clks[MPC512x_CLK_IPS]); + out_be32(mccr_reg, (0 << 16)); + out_be32(mccr_reg, (0 << 16) | ((div - 1) << 17)); + out_be32(mccr_reg, (1 << 16) | ((div - 1) << 17)); + + /* + * create the 'struct clk' items of the MCLK's clock subtree + * + * note that by design we always create all nodes and won't take + * shortcuts here, because + * - the "internal" MCLK_DIV and MCLK_OUT signal in turn are + * selectable inputs to the CFM while those who "actually use" + * the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK + * for their bitrate + * - in the absence of "aliases" for clocks we need to create + * individual 'struct clk' items for whatever might get + * referenced or looked up, even if several of those items are + * identical from the logical POV (their rate value) + * - for easier future maintenance and for better reflection of + * the SoC's documentation, it appears appropriate to generate + * clock items even for those muxers which actually are NOPs + * (those with two inputs of which one is reserved) + */ + clks[clks_idx_int + MCLK_IDX_MUX0] = mpc512x_clk_muxed( + entry->name_mux0, + soc_has_mclk_mux0_canin() + ? &parent_names_mux0_canin[0] + : &parent_names_mux0_spdif[0], + ARRAY_SIZE(parent_names_mux0_spdif), + mccr_reg, 14, 2); + clks[clks_idx_int + MCLK_IDX_EN0] = mpc512x_clk_gated( + entry->name_en0, entry->name_mux0, + mccr_reg, 16); + clks[clks_idx_int + MCLK_IDX_DIV0] = mpc512x_clk_divider( + entry->name_div0, + entry->name_en0, CLK_SET_RATE_GATE, + mccr_reg, 17, 15, 0); + if (entry->has_mclk1) { + clks[clks_idx_pub] = mpc512x_clk_muxed( + entry->name_mclk, + &entry->parent_names_mux1[0], + ARRAY_SIZE(entry->parent_names_mux1), + mccr_reg, 7, 1); + } else { + clks[clks_idx_pub] = mpc512x_clk_factor( + entry->name_mclk, + entry->parent_names_mux1[0], + 1, 1); + } +} + +/* }}} MCLK helpers */ + +static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) +{ + int sys_mul, sys_div, ips_div; + int mul, div; + size_t mclk_idx; + int freq; + + /* + * developer's notes: + * - consider whether to handle clocks which have both gates and + * dividers via intermediates or by means of composites + * - fractional dividers appear to not map well to composites + * since they can be seen as a fixed multiplier and an + * adjustable divider, while composites can only combine at + * most one of a mux, div, and gate each into one 'struct clk' + * item + * - PSC/MSCAN/SPDIF clock generation OTOH already is very + * specific and cannot get mapped to composites (at least not + * a single one, maybe two of them, but then some of these + * intermediate clock signals get referenced elsewhere (e.g. + * in the clock frequency measurement, CFM) and thus need + * publicly available names + * - the current source layout appropriately reflects the + * hardware setup, and it works, so it's questionable whether + * further changes will result in big enough a benefit + */ + + /* regardless of whether XTAL/OSC exists, have REF created */ + mpc512x_clk_setup_ref_clock(np, busfreq, &sys_mul, &sys_div, &ips_div); + + /* now setup the REF -> SYS -> CSB -> IPS hierarchy */ + clks[MPC512x_CLK_SYS] = mpc512x_clk_factor("sys", "ref", + sys_mul, sys_div); + clks[MPC512x_CLK_CSB] = mpc512x_clk_factor("csb", "sys", 1, 2); + clks[MPC512x_CLK_IPS] = mpc512x_clk_divtable("ips", "csb", + &clkregs->scfr1, 23, 3, + divtab_2346); + /* now setup anything below SYS and CSB and IPS */ + + clks[MPC512x_CLK_DDR_UG] = mpc512x_clk_factor("ddr-ug", "sys", 1, 2); + + /* + * the Reference Manual discusses that for SDHC only even divide + * ratios are supported because clock domain synchronization + * between 'per' and 'ipg' is broken; + * keep the divider's bit 0 cleared (per reset value), and only + * allow to setup the divider's bits 7:1, which results in that + * only even divide ratios can get configured upon rate changes; + * keep the "x4" name because this bit shift hack is an internal + * implementation detail, the "fractional divider with quarters" + * semantics remains + */ + clks[MPC512x_CLK_SDHC_x4] = mpc512x_clk_factor("sdhc-x4", "csb", 2, 1); + clks[MPC512x_CLK_SDHC_UG] = mpc512x_clk_divider("sdhc-ug", "sdhc-x4", 0, + &clkregs->scfr2, 1, 7, + CLK_DIVIDER_ONE_BASED); + if (soc_has_sdhc2()) { + clks[MPC512x_CLK_SDHC2_UG] = mpc512x_clk_divider( + "sdhc2-ug", "sdhc-x4", 0, &clkregs->scfr2, + 9, 7, CLK_DIVIDER_ONE_BASED); + } + + clks[MPC512x_CLK_DIU_x4] = mpc512x_clk_factor("diu-x4", "csb", 4, 1); + clks[MPC512x_CLK_DIU_UG] = mpc512x_clk_divider("diu-ug", "diu-x4", 0, + &clkregs->scfr1, 0, 8, + CLK_DIVIDER_ONE_BASED); + + /* + * the "power architecture PLL" was setup from data which was + * sampled from the reset config word, at this point in time the + * configuration can be considered fixed and read only (i.e. no + * longer adjustable, or no longer in need of adjustment), which + * is why we don't register a PLL here but assume fixed factors + */ + mul = get_cpmf_mult_x2(); + div = 2; /* compensate for the fractional factor */ + clks[MPC512x_CLK_E300] = mpc512x_clk_factor("e300", "csb", mul, div); + + if (soc_has_mbx()) { + clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor( + "mbx-bus-ug", "csb", 1, 2); + clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable( + "mbx-ug", "mbx-bus-ug", &clkregs->scfr1, + 14, 3, divtab_1234); + clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor( + "mbx-3d-ug", "mbx-ug", 1, 1); + } + if (soc_has_pci()) { + clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable( + "pci-ug", "csb", &clkregs->scfr1, + 20, 3, divtab_2346); + } + if (soc_has_nfc_5125()) { + /* + * XXX TODO implement 5125 NFC clock setup logic, + * with high/low period counters in clkregs->scfr3, + * currently there are no users so it's ENOIMPL + */ + clks[MPC512x_CLK_NFC_UG] = ERR_PTR(-ENOTSUPP); + } else { + clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable( + "nfc-ug", "ips", &clkregs->scfr1, + 8, 3, divtab_1234); + } + clks[MPC512x_CLK_LPC_UG] = mpc512x_clk_divtable("lpc-ug", "ips", + &clkregs->scfr1, 11, 3, + divtab_1234); + + clks[MPC512x_CLK_LPC] = mpc512x_clk_gated("lpc", "lpc-ug", + &clkregs->sccr1, 30); + clks[MPC512x_CLK_NFC] = mpc512x_clk_gated("nfc", "nfc-ug", + &clkregs->sccr1, 29); + if (soc_has_pata()) { + clks[MPC512x_CLK_PATA] = mpc512x_clk_gated( + "pata", "ips", &clkregs->sccr1, 28); + } + /* for PSCs there is a "registers" gate and a bitrate MCLK subtree */ + for (mclk_idx = 0; mclk_idx < soc_max_pscnum(); mclk_idx++) { + char name[12]; + snprintf(name, sizeof(name), "psc%d", mclk_idx); + clks[MPC512x_CLK_PSC0 + mclk_idx] = mpc512x_clk_gated( + name, "ips", &clkregs->sccr1, 27 - mclk_idx); + mpc512x_clk_setup_mclk(&mclk_psc_data[mclk_idx], mclk_idx); + } + clks[MPC512x_CLK_PSC_FIFO] = mpc512x_clk_gated("psc-fifo", "ips", + &clkregs->sccr1, 15); + if (soc_has_sata()) { + clks[MPC512x_CLK_SATA] = mpc512x_clk_gated( + "sata", "ips", &clkregs->sccr1, 14); + } + clks[MPC512x_CLK_FEC] = mpc512x_clk_gated("fec", "ips", + &clkregs->sccr1, 13); + if (soc_has_pci()) { + clks[MPC512x_CLK_PCI] = mpc512x_clk_gated( + "pci", "pci-ug", &clkregs->sccr1, 11); + } + clks[MPC512x_CLK_DDR] = mpc512x_clk_gated("ddr", "ddr-ug", + &clkregs->sccr1, 10); + if (soc_has_fec2()) { + clks[MPC512x_CLK_FEC2] = mpc512x_clk_gated( + "fec2", "ips", &clkregs->sccr1, 9); + } + + clks[MPC512x_CLK_DIU] = mpc512x_clk_gated("diu", "diu-ug", + &clkregs->sccr2, 31); + if (soc_has_axe()) { + clks[MPC512x_CLK_AXE] = mpc512x_clk_gated( + "axe", "csb", &clkregs->sccr2, 30); + } + clks[MPC512x_CLK_MEM] = mpc512x_clk_gated("mem", "ips", + &clkregs->sccr2, 29); + clks[MPC512x_CLK_USB1] = mpc512x_clk_gated("usb1", "csb", + &clkregs->sccr2, 28); + clks[MPC512x_CLK_USB2] = mpc512x_clk_gated("usb2", "csb", + &clkregs->sccr2, 27); + clks[MPC512x_CLK_I2C] = mpc512x_clk_gated("i2c", "ips", + &clkregs->sccr2, 26); + /* MSCAN differs from PSC with just one gate for multiple components */ + clks[MPC512x_CLK_BDLC] = mpc512x_clk_gated("bdlc", "ips", + &clkregs->sccr2, 25); + for (mclk_idx = 0; mclk_idx < ARRAY_SIZE(mclk_mscan_data); mclk_idx++) + mpc512x_clk_setup_mclk(&mclk_mscan_data[mclk_idx], mclk_idx); + clks[MPC512x_CLK_SDHC] = mpc512x_clk_gated("sdhc", "sdhc-ug", + &clkregs->sccr2, 24); + /* there is only one SPDIF component, which shares MCLK support code */ + if (soc_has_spdif()) { + clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated( + "spdif", "ips", &clkregs->sccr2, 23); + mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0); + } + if (soc_has_mbx()) { + clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated( + "mbx-bus", "mbx-bus-ug", &clkregs->sccr2, 22); + clks[MPC512x_CLK_MBX] = mpc512x_clk_gated( + "mbx", "mbx-ug", &clkregs->sccr2, 21); + clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated( + "mbx-3d", "mbx-3d-ug", &clkregs->sccr2, 20); + } + clks[MPC512x_CLK_IIM] = mpc512x_clk_gated("iim", "csb", + &clkregs->sccr2, 19); + if (soc_has_viu()) { + clks[MPC512x_CLK_VIU] = mpc512x_clk_gated( + "viu", "csb", &clkregs->sccr2, 18); + } + if (soc_has_sdhc2()) { + clks[MPC512x_CLK_SDHC2] = mpc512x_clk_gated( + "sdhc-2", "sdhc2-ug", &clkregs->sccr2, 17); + } + + if (soc_has_outclk()) { + size_t idx; /* used as mclk_idx, just to trim line length */ + for (idx = 0; idx < ARRAY_SIZE(mclk_outclk_data); idx++) + mpc512x_clk_setup_mclk(&mclk_outclk_data[idx], idx); + } + + /* + * externally provided clocks (when implemented in hardware, + * device tree may specify values which otherwise were unknown) + */ + freq = get_freq_from_dt("psc_mclk_in"); + if (!freq) + freq = 25000000; + clks[MPC512x_CLK_PSC_MCLK_IN] = mpc512x_clk_fixed("psc_mclk_in", freq); + if (soc_has_mclk_mux0_canin()) { + freq = get_freq_from_dt("can_clk_in"); + clks[MPC512x_CLK_CAN_CLK_IN] = mpc512x_clk_fixed( + "can_clk_in", freq); + } else { + freq = get_freq_from_dt("spdif_tx_in"); + clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed( + "spdif_tx_in", freq); + freq = get_freq_from_dt("spdif_rx_in"); + clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed( + "spdif_rx_in", freq); + } + + /* fixed frequency for AC97, always 24.567MHz */ + clks[MPC512x_CLK_AC97] = mpc512x_clk_fixed("ac97", 24567000); + + /* + * pre-enable those "internal" clock items which never get + * claimed by any peripheral driver, to not have the clock + * subsystem disable them late at startup + */ + clk_prepare_enable(clks[MPC512x_CLK_DUMMY]); + clk_prepare_enable(clks[MPC512x_CLK_E300]); /* PowerPC CPU */ + clk_prepare_enable(clks[MPC512x_CLK_DDR]); /* DRAM */ + clk_prepare_enable(clks[MPC512x_CLK_MEM]); /* SRAM */ + clk_prepare_enable(clks[MPC512x_CLK_IPS]); /* SoC periph */ + clk_prepare_enable(clks[MPC512x_CLK_LPC]); /* boot media */ +} + +/* + * registers the set of public clocks (those listed in the dt-bindings/ + * header file) for OF lookups, keeps the intermediates private to us + */ +static void __init mpc5121_clk_register_of_provider(struct device_node *np) +{ + clk_data.clks = clks; + clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1; /* _not_ ARRAY_SIZE() */ + of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data); +} + +/* + * temporary support for the period of time between introduction of CCF + * support and the adjustment of peripheral drivers to OF based lookups + */ +static void __init mpc5121_clk_provide_migration_support(void) +{ + struct device_node *np; + /* + * pre-enable those clock items which are not yet appropriately + * acquired by their peripheral driver + * + * the PCI clock cannot get acquired by its peripheral driver, + * because for this platform the driver won't probe(), instead + * initialization is done from within the .setup_arch() routine + * at a point in time where the clock provider has not been + * setup yet and thus isn't available yet + * + * so we "pre-enable" the clock here, to not have the clock + * subsystem automatically disable this item in a late init call + * + * this PCI clock pre-enable workaround only applies when there + * are device tree nodes for PCI and thus the peripheral driver + * has attached to bridges, otherwise the PCI clock remains + * unused and so it gets disabled + */ + clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */ + np = of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci"); + of_node_put(np); + if (np) + clk_prepare_enable(clks[MPC512x_CLK_PCI]); +} + +/* + * those macros are not exactly pretty, but they encapsulate a lot + * of copy'n'paste heavy code which is even more ugly, and reduce + * the potential for inconsistencies in those many code copies + */ +#define FOR_NODES(compatname) \ + for_each_compatible_node(np, NULL, compatname) + +#define NODE_PREP do { \ + of_address_to_resource(np, 0, &res); \ + snprintf(devname, sizeof(devname), "%pa.%s", &res.start, np->name); \ +} while (0) + +#define NODE_CHK(clkname, clkitem, regnode, regflag) do { \ + struct clk *clk; \ + clk = of_clk_get_by_name(np, clkname); \ + if (IS_ERR(clk)) { \ + clk = clkitem; \ + clk_register_clkdev(clk, clkname, devname); \ + if (regnode) \ + clk_register_clkdev(clk, clkname, np->name); \ + did_register |= DID_REG_ ## regflag; \ + pr_debug("clock alias name '%s' for dev '%s' pointer %p\n", \ + clkname, devname, clk); \ + } else { \ + clk_put(clk); \ + } \ +} while (0) + +/* + * register source code provided fallback results for clock lookups, + * these get consulted when OF based clock lookup fails (that is in the + * case of not yet adjusted device tree data, where clock related specs + * are missing) + */ +static void __init mpc5121_clk_provide_backwards_compat(void) +{ + enum did_reg_flags { + DID_REG_PSC = BIT(0), + DID_REG_PSCFIFO = BIT(1), + DID_REG_NFC = BIT(2), + DID_REG_CAN = BIT(3), + DID_REG_I2C = BIT(4), + DID_REG_DIU = BIT(5), + DID_REG_VIU = BIT(6), + DID_REG_FEC = BIT(7), + DID_REG_USB = BIT(8), + DID_REG_PATA = BIT(9), + }; + + int did_register; + struct device_node *np; + struct resource res; + int idx; + char devname[32]; + + did_register = 0; + + FOR_NODES(mpc512x_select_psc_compat()) { + NODE_PREP; + idx = (res.start >> 8) & 0xf; + NODE_CHK("ipg", clks[MPC512x_CLK_PSC0 + idx], 0, PSC); + NODE_CHK("mclk", clks[MPC512x_CLK_PSC0_MCLK + idx], 0, PSC); + } + + FOR_NODES("fsl,mpc5121-psc-fifo") { + NODE_PREP; + NODE_CHK("ipg", clks[MPC512x_CLK_PSC_FIFO], 1, PSCFIFO); + } + + FOR_NODES("fsl,mpc5121-nfc") { + NODE_PREP; + NODE_CHK("ipg", clks[MPC512x_CLK_NFC], 0, NFC); + } + + FOR_NODES("fsl,mpc5121-mscan") { + NODE_PREP; + idx = 0; + idx += (res.start & 0x2000) ? 2 : 0; + idx += (res.start & 0x0080) ? 1 : 0; + NODE_CHK("ipg", clks[MPC512x_CLK_BDLC], 0, CAN); + NODE_CHK("mclk", clks[MPC512x_CLK_MSCAN0_MCLK + idx], 0, CAN); + } + + /* + * do register the 'ips', 'sys', and 'ref' names globally + * instead of inside each individual CAN node, as there is no + * potential for a name conflict (in contrast to 'ipg' and 'mclk') + */ + if (did_register & DID_REG_CAN) { + clk_register_clkdev(clks[MPC512x_CLK_IPS], "ips", NULL); + clk_register_clkdev(clks[MPC512x_CLK_SYS], "sys", NULL); + clk_register_clkdev(clks[MPC512x_CLK_REF], "ref", NULL); + } + + FOR_NODES("fsl,mpc5121-i2c") { + NODE_PREP; + NODE_CHK("ipg", clks[MPC512x_CLK_I2C], 0, I2C); + } + + /* + * workaround for the fact that the I2C driver does an "anonymous" + * lookup (NULL name spec, which yields the first clock spec) for + * which we cannot register an alias -- a _global_ 'ipg' alias that + * is not bound to any device name and returns the I2C clock item + * is not a good idea + * + * so we have the lookup in the peripheral driver fail, which is + * silent and non-fatal, and pre-enable the clock item here such + * that register access is possible + * + * see commit b3bfce2b "i2c: mpc: cleanup clock API use" for + * details, adjusting s/NULL/"ipg"/ in i2c-mpc.c would make this + * workaround obsolete + */ + if (did_register & DID_REG_I2C) + clk_prepare_enable(clks[MPC512x_CLK_I2C]); + + FOR_NODES("fsl,mpc5121-diu") { + NODE_PREP; + NODE_CHK("ipg", clks[MPC512x_CLK_DIU], 1, DIU); + } + + FOR_NODES("fsl,mpc5121-viu") { + NODE_PREP; + NODE_CHK("ipg", clks[MPC512x_CLK_VIU], 0, VIU); + } + + /* + * note that 2771399a "fs_enet: cleanup clock API use" did use the + * "per" string for the clock lookup in contrast to the "ipg" name + * which most other nodes are using -- this is not a fatal thing + * but just something to keep in mind when doing compatibility + * registration, it's a non-issue with up-to-date device tree data + */ + FOR_NODES("fsl,mpc5121-fec") { + NODE_PREP; + NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC); + } + FOR_NODES("fsl,mpc5121-fec-mdio") { + NODE_PREP; + NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC); + } + /* + * MPC5125 has two FECs: FEC1 at 0x2800, FEC2 at 0x4800; + * the clock items don't "form an array" since FEC2 was + * added only later and was not allowed to shift all other + * clock item indices, so the numbers aren't adjacent + */ + FOR_NODES("fsl,mpc5125-fec") { + NODE_PREP; + if (res.start & 0x4000) + idx = MPC512x_CLK_FEC2; + else + idx = MPC512x_CLK_FEC; + NODE_CHK("per", clks[idx], 0, FEC); + } + + FOR_NODES("fsl,mpc5121-usb2-dr") { + NODE_PREP; + idx = (res.start & 0x4000) ? 1 : 0; + NODE_CHK("ipg", clks[MPC512x_CLK_USB1 + idx], 0, USB); + } + + FOR_NODES("fsl,mpc5121-pata") { + NODE_PREP; + NODE_CHK("ipg", clks[MPC512x_CLK_PATA], 0, PATA); + } + + /* + * try to collapse diagnostics into a single line of output yet + * provide a full list of what is missing, to avoid noise in the + * absence of up-to-date device tree data -- backwards + * compatibility to old DTBs is a requirement, updates may be + * desirable or preferrable but are not at all mandatory + */ + if (did_register) { + pr_notice("device tree lacks clock specs, adding fallbacks (0x%x,%s%s%s%s%s%s%s%s%s%s)\n", + did_register, + (did_register & DID_REG_PSC) ? " PSC" : "", + (did_register & DID_REG_PSCFIFO) ? " PSCFIFO" : "", + (did_register & DID_REG_NFC) ? " NFC" : "", + (did_register & DID_REG_CAN) ? " CAN" : "", + (did_register & DID_REG_I2C) ? " I2C" : "", + (did_register & DID_REG_DIU) ? " DIU" : "", + (did_register & DID_REG_VIU) ? " VIU" : "", + (did_register & DID_REG_FEC) ? " FEC" : "", + (did_register & DID_REG_USB) ? " USB" : "", + (did_register & DID_REG_PATA) ? " PATA" : ""); + } else { + pr_debug("device tree has clock specs, no fallbacks added\n"); + } +} + +/* + * The "fixed-clock" nodes (which includes the oscillator node if the board's + * DT provides one) has already been scanned by the of_clk_init() in + * time_init(). + */ +int __init mpc5121_clk_init(void) +{ + struct device_node *clk_np; + int busfreq; + + /* map the clock control registers */ + clk_np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock"); + if (!clk_np) + return -ENODEV; + clkregs = of_iomap(clk_np, 0); + WARN_ON(!clkregs); + + /* determine the SoC variant we run on */ + mpc512x_clk_determine_soc(); + + /* invalidate all not yet registered clock slots */ + mpc512x_clk_preset_data(); + + /* + * add a dummy clock for those situations where a clock spec is + * required yet no real clock is involved + */ + clks[MPC512x_CLK_DUMMY] = mpc512x_clk_fixed("dummy", 0); + + /* + * have all the real nodes in the clock tree populated from REF + * down to all leaves, either starting from the OSC node or from + * a REF root that was created from the IPS bus clock input + */ + busfreq = get_freq_from_dt("bus-frequency"); + mpc512x_clk_setup_clock_tree(clk_np, busfreq); + + /* register as an OF clock provider */ + mpc5121_clk_register_of_provider(clk_np); + + of_node_put(clk_np); + + /* + * unbreak not yet adjusted peripheral drivers during migration + * towards fully operational common clock support, and allow + * operation in the absence of clock related device tree specs + */ + mpc5121_clk_provide_migration_support(); + mpc5121_clk_provide_backwards_compat(); + + return 0; +} diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c new file mode 100644 index 0000000000..a18f85b3ef --- /dev/null +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: John Rigby, , Thur Mar 29 2007 + * + * Description: + * MPC5121 ADS board setup + */ + +#include +#include +#include + +#include +#include +#include + +#include + +#include "mpc512x.h" +#include "mpc5121_ads.h" + +static void __init mpc5121_ads_setup_arch(void) +{ + printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n"); + /* + * cpld regs are needed early + */ + mpc5121_ads_cpld_map(); + + mpc512x_setup_arch(); +} + +static void __init mpc5121_ads_setup_pci(void) +{ +#ifdef CONFIG_PCI + struct device_node *np; + + for_each_compatible_node(np, "pci", "fsl,mpc5121-pci") + mpc83xx_add_bridge(np); +#endif +} + +static void __init mpc5121_ads_init_IRQ(void) +{ + mpc512x_init_IRQ(); + mpc5121_ads_cpld_pic_init(); +} + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc5121_ads_probe(void) +{ + mpc512x_init_early(); + + return 1; +} + +define_machine(mpc5121_ads) { + .name = "MPC5121 ADS", + .compatible = "fsl,mpc5121ads", + .probe = mpc5121_ads_probe, + .setup_arch = mpc5121_ads_setup_arch, + .discover_phbs = mpc5121_ads_setup_pci, + .init = mpc512x_init, + .init_IRQ = mpc5121_ads_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc512x_restart, +}; diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.h b/arch/powerpc/platforms/512x/mpc5121_ads.h new file mode 100644 index 0000000000..c88dea828c --- /dev/null +++ b/arch/powerpc/platforms/512x/mpc5121_ads.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Prototypes for ADS5121 specific code + */ + +#ifndef __MPC512ADS_H__ +#define __MPC512ADS_H__ +extern void __init mpc5121_ads_cpld_map(void); +extern void __init mpc5121_ads_cpld_pic_init(void); +#endif /* __MPC512ADS_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c new file mode 100644 index 0000000000..6f08d07aee --- /dev/null +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: John Rigby, + * + * Description: + * MPC5121ADS CPLD irq handling + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include + +static struct device_node *cpld_pic_node; +static struct irq_domain *cpld_pic_host; + +/* + * Bits to ignore in the misc_status register + * 0x10 touch screen pendown is hard routed to irq1 + * 0x02 pci status is read from pci status register + */ +#define MISC_IGNORE 0x12 + +/* + * Nothing to ignore in pci status register + */ +#define PCI_IGNORE 0x00 + +struct cpld_pic { + u8 pci_mask; + u8 pci_status; + u8 route; + u8 misc_mask; + u8 misc_status; + u8 misc_control; +}; + +static struct cpld_pic __iomem *cpld_regs; + +static void __iomem * +irq_to_pic_mask(unsigned int irq) +{ + return irq <= 7 ? &cpld_regs->pci_mask : &cpld_regs->misc_mask; +} + +static unsigned int +irq_to_pic_bit(unsigned int irq) +{ + return 1 << (irq & 0x7); +} + +static void +cpld_mask_irq(struct irq_data *d) +{ + unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d); + void __iomem *pic_mask = irq_to_pic_mask(cpld_irq); + + out_8(pic_mask, + in_8(pic_mask) | irq_to_pic_bit(cpld_irq)); +} + +static void +cpld_unmask_irq(struct irq_data *d) +{ + unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d); + void __iomem *pic_mask = irq_to_pic_mask(cpld_irq); + + out_8(pic_mask, + in_8(pic_mask) & ~irq_to_pic_bit(cpld_irq)); +} + +static struct irq_chip cpld_pic = { + .name = "CPLD PIC", + .irq_mask = cpld_mask_irq, + .irq_ack = cpld_mask_irq, + .irq_unmask = cpld_unmask_irq, +}; + +static unsigned int +cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, + u8 __iomem *maskp) +{ + u8 status = in_8(statusp); + u8 mask = in_8(maskp); + + /* ignore don't cares and masked irqs */ + status |= (ignore | mask); + + if (status == 0xff) + return ~0; + + return ffz(status) + offset; +} + +static void cpld_pic_cascade(struct irq_desc *desc) +{ + unsigned int hwirq; + + hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, + &cpld_regs->pci_mask); + if (hwirq != ~0) { + generic_handle_domain_irq(cpld_pic_host, hwirq); + return; + } + + hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, + &cpld_regs->misc_mask); + if (hwirq != ~0) { + generic_handle_domain_irq(cpld_pic_host, hwirq); + return; + } +} + +static int +cpld_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) +{ + return cpld_pic_node == node; +} + +static int +cpld_pic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_chip_and_handler(virq, &cpld_pic, handle_level_irq); + return 0; +} + +static const struct irq_domain_ops cpld_pic_host_ops = { + .match = cpld_pic_host_match, + .map = cpld_pic_host_map, +}; + +void __init +mpc5121_ads_cpld_map(void) +{ + struct device_node *np = NULL; + + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic"); + if (!np) { + printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n"); + return; + } + + cpld_regs = of_iomap(np, 0); + of_node_put(np); +} + +void __init +mpc5121_ads_cpld_pic_init(void) +{ + unsigned int cascade_irq; + struct device_node *np = NULL; + + pr_debug("cpld_ic_init\n"); + + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic"); + if (!np) { + printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n"); + return; + } + + if (!cpld_regs) + goto end; + + cascade_irq = irq_of_parse_and_map(np, 0); + if (!cascade_irq) + goto end; + + /* + * statically route touch screen pendown through 1 + * and ignore it here + * route all others through our cascade irq + */ + out_8(&cpld_regs->route, 0xfd); + out_8(&cpld_regs->pci_mask, 0xff); + /* unmask pci ints in misc mask */ + out_8(&cpld_regs->misc_mask, ~(MISC_IGNORE)); + + cpld_pic_node = of_node_get(np); + + cpld_pic_host = irq_domain_add_linear(np, 16, &cpld_pic_host_ops, NULL); + if (!cpld_pic_host) { + printk(KERN_ERR "CPLD PIC: failed to allocate irq host!\n"); + goto end; + } + + irq_set_chained_handler(cascade_irq, cpld_pic_cascade); +end: + of_node_put(np); +} diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h new file mode 100644 index 0000000000..d2cb06e3a4 --- /dev/null +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * Prototypes for MPC512x shared code + */ + +#ifndef __MPC512X_H__ +#define __MPC512X_H__ +extern void __init mpc512x_init_IRQ(void); +extern void __init mpc512x_init_early(void); +extern void __init mpc512x_init(void); +extern void __init mpc512x_setup_arch(void); +extern int __init mpc5121_clk_init(void); +const char *__init mpc512x_select_psc_compat(void); +extern void __noreturn mpc512x_restart(char *cmd); + +#endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c new file mode 100644 index 0000000000..0d58ab257c --- /dev/null +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: John Rigby, + * + * Description: + * MPC512x SoC setup + */ + +#include +#include + +#include +#include +#include + +#include "mpc512x.h" + +/* + * list of supported boards + */ +static const char * const board[] __initconst = { + "prt,prtlvt", + "fsl,mpc5125ads", + "ifm,ac14xx", + NULL +}; + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc512x_generic_probe(void) +{ + if (!of_device_compatible_match(of_root, board)) + return 0; + + mpc512x_init_early(); + + return 1; +} + +define_machine(mpc512x_generic) { + .name = "MPC512x generic", + .probe = mpc512x_generic_probe, + .init = mpc512x_init, + .setup_arch = mpc512x_setup_arch, + .init_IRQ = mpc512x_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc512x_restart, +}; diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c new file mode 100644 index 0000000000..4a25b6b486 --- /dev/null +++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c @@ -0,0 +1,518 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The driver for Freescale MPC512x LocalPlus Bus FIFO + * (called SCLPC in the Reference Manual). + * + * Copyright (C) 2013-2015 Alexander Popov . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "mpc512x_lpbfifo" + +struct cs_range { + u32 csnum; + u32 base; /* must be zero */ + u32 addr; + u32 size; +}; + +static struct lpbfifo_data { + spinlock_t lock; /* for protecting lpbfifo_data */ + phys_addr_t regs_phys; + resource_size_t regs_size; + struct mpc512x_lpbfifo __iomem *regs; + int irq; + struct cs_range *cs_ranges; + size_t cs_n; + struct dma_chan *chan; + struct mpc512x_lpbfifo_request *req; + dma_addr_t ram_bus_addr; + bool wait_lpbfifo_irq; + bool wait_lpbfifo_callback; +} lpbfifo; + +/* + * A data transfer from RAM to some device on LPB is finished + * when both mpc512x_lpbfifo_irq() and mpc512x_lpbfifo_callback() + * have been called. We execute the callback registered in + * mpc512x_lpbfifo_request just after that. + * But for a data transfer from some device on LPB to RAM we don't enable + * LPBFIFO interrupt because clearing MPC512X_SCLPC_SUCCESS interrupt flag + * automatically disables LPBFIFO reading request to the DMA controller + * and the data transfer hangs. So the callback registered in + * mpc512x_lpbfifo_request is executed at the end of mpc512x_lpbfifo_callback(). + */ + +/* + * mpc512x_lpbfifo_irq - IRQ handler for LPB FIFO + */ +static irqreturn_t mpc512x_lpbfifo_irq(int irq, void *param) +{ + struct device *dev = (struct device *)param; + struct mpc512x_lpbfifo_request *req = NULL; + unsigned long flags; + u32 status; + + spin_lock_irqsave(&lpbfifo.lock, flags); + + if (!lpbfifo.regs) + goto end; + + req = lpbfifo.req; + if (!req || req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) { + dev_err(dev, "bogus LPBFIFO IRQ\n"); + goto end; + } + + status = in_be32(&lpbfifo.regs->status); + if (status != MPC512X_SCLPC_SUCCESS) { + dev_err(dev, "DMA transfer from RAM to peripheral failed\n"); + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + goto end; + } + /* Clear the interrupt flag */ + out_be32(&lpbfifo.regs->status, MPC512X_SCLPC_SUCCESS); + + lpbfifo.wait_lpbfifo_irq = false; + + if (lpbfifo.wait_lpbfifo_callback) + goto end; + + /* Transfer is finished, set the FIFO as idle */ + lpbfifo.req = NULL; + + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + if (req->callback) + req->callback(req); + + return IRQ_HANDLED; + + end: + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return IRQ_HANDLED; +} + +/* + * mpc512x_lpbfifo_callback is called by DMA driver when + * DMA transaction is finished. + */ +static void mpc512x_lpbfifo_callback(void *param) +{ + unsigned long flags; + struct mpc512x_lpbfifo_request *req = NULL; + enum dma_data_direction dir; + + spin_lock_irqsave(&lpbfifo.lock, flags); + + if (!lpbfifo.regs) { + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return; + } + + req = lpbfifo.req; + if (!req) { + pr_err("bogus LPBFIFO callback\n"); + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return; + } + + /* Release the mapping */ + if (req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) + dir = DMA_TO_DEVICE; + else + dir = DMA_FROM_DEVICE; + dma_unmap_single(lpbfifo.chan->device->dev, + lpbfifo.ram_bus_addr, req->size, dir); + + lpbfifo.wait_lpbfifo_callback = false; + + if (!lpbfifo.wait_lpbfifo_irq) { + /* Transfer is finished, set the FIFO as idle */ + lpbfifo.req = NULL; + + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + if (req->callback) + req->callback(req); + } else { + spin_unlock_irqrestore(&lpbfifo.lock, flags); + } +} + +static int mpc512x_lpbfifo_kick(void) +{ + u32 bits; + bool no_incr = false; + u32 bpt = 32; /* max bytes per LPBFIFO transaction involving DMA */ + u32 cs = 0; + size_t i; + struct dma_device *dma_dev = NULL; + struct scatterlist sg; + enum dma_data_direction dir; + struct dma_slave_config dma_conf = {}; + struct dma_async_tx_descriptor *dma_tx = NULL; + dma_cookie_t cookie; + int ret; + + /* + * 1. Fit the requirements: + * - the packet size must be a multiple of 4 since FIFO Data Word + * Register allows only full-word access according the Reference + * Manual; + * - the physical address of the device on LPB and the packet size + * must be aligned on BPT (bytes per transaction) or 8-bytes + * boundary according the Reference Manual; + * - but we choose DMA maxburst equal (or very close to) BPT to prevent + * DMA controller from overtaking FIFO and causing FIFO underflow + * error. So we force the packet size to be aligned on BPT boundary + * not to confuse DMA driver which requires the packet size to be + * aligned on maxburst boundary; + * - BPT should be set to the LPB device port size for operation with + * disabled auto-incrementing according Reference Manual. + */ + if (lpbfifo.req->size == 0 || !IS_ALIGNED(lpbfifo.req->size, 4)) + return -EINVAL; + + if (lpbfifo.req->portsize != LPB_DEV_PORTSIZE_UNDEFINED) { + bpt = lpbfifo.req->portsize; + no_incr = true; + } + + while (bpt > 1) { + if (IS_ALIGNED(lpbfifo.req->dev_phys_addr, min(bpt, 0x8u)) && + IS_ALIGNED(lpbfifo.req->size, bpt)) { + break; + } + + if (no_incr) + return -EINVAL; + + bpt >>= 1; + } + dma_conf.dst_maxburst = max(bpt, 0x4u) / 4; + dma_conf.src_maxburst = max(bpt, 0x4u) / 4; + + for (i = 0; i < lpbfifo.cs_n; i++) { + phys_addr_t cs_start = lpbfifo.cs_ranges[i].addr; + phys_addr_t cs_end = cs_start + lpbfifo.cs_ranges[i].size; + phys_addr_t access_start = lpbfifo.req->dev_phys_addr; + phys_addr_t access_end = access_start + lpbfifo.req->size; + + if (access_start >= cs_start && access_end <= cs_end) { + cs = lpbfifo.cs_ranges[i].csnum; + break; + } + } + if (i == lpbfifo.cs_n) + return -EFAULT; + + /* 2. Prepare DMA */ + dma_dev = lpbfifo.chan->device; + + if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) { + dir = DMA_TO_DEVICE; + dma_conf.direction = DMA_MEM_TO_DEV; + dma_conf.dst_addr = lpbfifo.regs_phys + + offsetof(struct mpc512x_lpbfifo, data_word); + } else { + dir = DMA_FROM_DEVICE; + dma_conf.direction = DMA_DEV_TO_MEM; + dma_conf.src_addr = lpbfifo.regs_phys + + offsetof(struct mpc512x_lpbfifo, data_word); + } + dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + + /* Make DMA channel work with LPB FIFO data register */ + if (dma_dev->device_config(lpbfifo.chan, &dma_conf)) { + ret = -EINVAL; + goto err_dma_prep; + } + + sg_init_table(&sg, 1); + + sg_dma_address(&sg) = dma_map_single(dma_dev->dev, + lpbfifo.req->ram_virt_addr, lpbfifo.req->size, dir); + if (dma_mapping_error(dma_dev->dev, sg_dma_address(&sg))) + return -EFAULT; + + lpbfifo.ram_bus_addr = sg_dma_address(&sg); /* For freeing later */ + + sg_dma_len(&sg) = lpbfifo.req->size; + + dma_tx = dmaengine_prep_slave_sg(lpbfifo.chan, &sg, + 1, dma_conf.direction, 0); + if (!dma_tx) { + ret = -ENOSPC; + goto err_dma_prep; + } + dma_tx->callback = mpc512x_lpbfifo_callback; + dma_tx->callback_param = NULL; + + /* 3. Prepare FIFO */ + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + out_be32(&lpbfifo.regs->enable, 0x0); + + /* + * Configure the watermarks for write operation (RAM->DMA->FIFO->dev): + * - high watermark 7 words according the Reference Manual, + * - low watermark 512 bytes (half of the FIFO). + * These watermarks don't work for read operation since the + * MPC512X_SCLPC_FLUSH bit is set (according the Reference Manual). + */ + out_be32(&lpbfifo.regs->fifo_ctrl, MPC512X_SCLPC_FIFO_CTRL(0x7)); + out_be32(&lpbfifo.regs->fifo_alarm, MPC512X_SCLPC_FIFO_ALARM(0x200)); + + /* + * Start address is a physical address of the region which belongs + * to the device on the LocalPlus Bus + */ + out_be32(&lpbfifo.regs->start_addr, lpbfifo.req->dev_phys_addr); + + /* + * Configure chip select, transfer direction, address increment option + * and bytes per transaction option + */ + bits = MPC512X_SCLPC_CS(cs); + if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) + bits |= MPC512X_SCLPC_READ | MPC512X_SCLPC_FLUSH; + if (no_incr) + bits |= MPC512X_SCLPC_DAI; + bits |= MPC512X_SCLPC_BPT(bpt); + out_be32(&lpbfifo.regs->ctrl, bits); + + /* Unmask irqs */ + bits = MPC512X_SCLPC_ENABLE | MPC512X_SCLPC_ABORT_INT_ENABLE; + if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) + bits |= MPC512X_SCLPC_NORM_INT_ENABLE; + else + lpbfifo.wait_lpbfifo_irq = false; + + out_be32(&lpbfifo.regs->enable, bits); + + /* 4. Set packet size and kick FIFO off */ + bits = lpbfifo.req->size | MPC512X_SCLPC_START; + out_be32(&lpbfifo.regs->pkt_size, bits); + + /* 5. Finally kick DMA off */ + cookie = dma_tx->tx_submit(dma_tx); + if (dma_submit_error(cookie)) { + ret = -ENOSPC; + goto err_dma_submit; + } + + return 0; + + err_dma_submit: + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + err_dma_prep: + dma_unmap_single(dma_dev->dev, sg_dma_address(&sg), + lpbfifo.req->size, dir); + return ret; +} + +static int mpc512x_lpbfifo_submit_locked(struct mpc512x_lpbfifo_request *req) +{ + int ret = 0; + + if (!lpbfifo.regs) + return -ENODEV; + + /* Check whether a transfer is in progress */ + if (lpbfifo.req) + return -EBUSY; + + lpbfifo.wait_lpbfifo_irq = true; + lpbfifo.wait_lpbfifo_callback = true; + lpbfifo.req = req; + + ret = mpc512x_lpbfifo_kick(); + if (ret != 0) + lpbfifo.req = NULL; /* Set the FIFO as idle */ + + return ret; +} + +int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&lpbfifo.lock, flags); + ret = mpc512x_lpbfifo_submit_locked(req); + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + return ret; +} +EXPORT_SYMBOL(mpc512x_lpbfifo_submit); + +/* + * LPBFIFO driver uses "ranges" property of "localbus" device tree node + * for being able to determine the chip select number of a client device + * ordering a DMA transfer. + */ +static int get_cs_ranges(struct device *dev) +{ + int ret = -ENODEV; + struct device_node *lb_node; + size_t i = 0; + struct of_range_parser parser; + struct of_range range; + + lb_node = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-localbus"); + if (!lb_node) + return ret; + + of_range_parser_init(&parser, lb_node); + lpbfifo.cs_n = of_range_count(&parser); + + lpbfifo.cs_ranges = devm_kcalloc(dev, lpbfifo.cs_n, + sizeof(struct cs_range), GFP_KERNEL); + if (!lpbfifo.cs_ranges) + goto end; + + for_each_of_range(&parser, &range) { + u32 base = lower_32_bits(range.bus_addr); + if (base) + goto end; + + lpbfifo.cs_ranges[i].csnum = upper_32_bits(range.bus_addr); + lpbfifo.cs_ranges[i].base = base; + lpbfifo.cs_ranges[i].addr = range.cpu_addr; + lpbfifo.cs_ranges[i].size = range.size; + i++; + } + + ret = 0; + + end: + of_node_put(lb_node); + return ret; +} + +static int mpc512x_lpbfifo_probe(struct platform_device *pdev) +{ + struct resource r; + int ret = 0; + + memset(&lpbfifo, 0, sizeof(struct lpbfifo_data)); + spin_lock_init(&lpbfifo.lock); + + lpbfifo.chan = dma_request_chan(&pdev->dev, "rx-tx"); + if (IS_ERR(lpbfifo.chan)) + return PTR_ERR(lpbfifo.chan); + + if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) { + dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n"); + ret = -ENODEV; + goto err0; + } + + lpbfifo.regs_phys = r.start; + lpbfifo.regs_size = resource_size(&r); + + if (!devm_request_mem_region(&pdev->dev, lpbfifo.regs_phys, + lpbfifo.regs_size, DRV_NAME)) { + dev_err(&pdev->dev, "unable to request region\n"); + ret = -EBUSY; + goto err0; + } + + lpbfifo.regs = devm_ioremap(&pdev->dev, + lpbfifo.regs_phys, lpbfifo.regs_size); + if (!lpbfifo.regs) { + dev_err(&pdev->dev, "mapping registers failed\n"); + ret = -ENOMEM; + goto err0; + } + + out_be32(&lpbfifo.regs->enable, + MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + + if (get_cs_ranges(&pdev->dev) != 0) { + dev_err(&pdev->dev, "bad '/localbus' device tree node\n"); + ret = -ENODEV; + goto err0; + } + + lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (!lpbfifo.irq) { + dev_err(&pdev->dev, "mapping irq failed\n"); + ret = -ENODEV; + goto err0; + } + + if (request_irq(lpbfifo.irq, mpc512x_lpbfifo_irq, 0, + DRV_NAME, &pdev->dev) != 0) { + dev_err(&pdev->dev, "requesting irq failed\n"); + ret = -ENODEV; + goto err1; + } + + dev_info(&pdev->dev, "probe succeeded\n"); + return 0; + + err1: + irq_dispose_mapping(lpbfifo.irq); + err0: + dma_release_channel(lpbfifo.chan); + return ret; +} + +static void mpc512x_lpbfifo_remove(struct platform_device *pdev) +{ + unsigned long flags; + struct dma_device *dma_dev = lpbfifo.chan->device; + struct mpc512x_lpbfifo __iomem *regs = NULL; + + spin_lock_irqsave(&lpbfifo.lock, flags); + regs = lpbfifo.regs; + lpbfifo.regs = NULL; + spin_unlock_irqrestore(&lpbfifo.lock, flags); + + dma_dev->device_terminate_all(lpbfifo.chan); + out_be32(®s->enable, MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET); + + free_irq(lpbfifo.irq, &pdev->dev); + irq_dispose_mapping(lpbfifo.irq); + dma_release_channel(lpbfifo.chan); +} + +static const struct of_device_id mpc512x_lpbfifo_match[] = { + { .compatible = "fsl,mpc512x-lpbfifo", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match); + +static struct platform_driver mpc512x_lpbfifo_driver = { + .probe = mpc512x_lpbfifo_probe, + .remove_new = mpc512x_lpbfifo_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = mpc512x_lpbfifo_match, + }, +}; + +module_platform_driver(mpc512x_lpbfifo_driver); + +MODULE_AUTHOR("Alexander Popov "); +MODULE_DESCRIPTION("MPC512x LocalPlus Bus FIFO device driver"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c new file mode 100644 index 0000000000..8f75e9574c --- /dev/null +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: John Rigby + * + * Description: + * MPC512x Shared code + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mpc512x.h" + +static struct mpc512x_reset_module __iomem *reset_module_base; + +void __noreturn mpc512x_restart(char *cmd) +{ + if (reset_module_base) { + /* Enable software reset "RSTE" */ + out_be32(&reset_module_base->rpr, 0x52535445); + /* Set software hard reset */ + out_be32(&reset_module_base->rcr, 0x2); + } else { + pr_err("Restart module not mapped.\n"); + } + for (;;) + ; +} + +struct fsl_diu_shared_fb { + u8 gamma[0x300]; /* 32-bit aligned! */ + struct diu_ad ad0; /* 32-bit aligned! */ + phys_addr_t fb_phys; + size_t fb_len; + bool in_use; +}; + +/* receives a pixel clock spec in pico seconds, adjusts the DIU clock rate */ +static void mpc512x_set_pixel_clock(unsigned int pixclock) +{ + struct device_node *np; + struct clk *clk_diu; + unsigned long epsilon, minpixclock, maxpixclock; + unsigned long offset, want, got, delta; + + /* lookup and enable the DIU clock */ + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu"); + if (!np) { + pr_err("Could not find DIU device tree node.\n"); + return; + } + clk_diu = of_clk_get(np, 0); + if (IS_ERR(clk_diu)) { + /* backwards compat with device trees that lack clock specs */ + clk_diu = clk_get_sys(np->name, "ipg"); + } + of_node_put(np); + if (IS_ERR(clk_diu)) { + pr_err("Could not lookup DIU clock.\n"); + return; + } + if (clk_prepare_enable(clk_diu)) { + pr_err("Could not enable DIU clock.\n"); + return; + } + + /* + * convert the picoseconds spec into the desired clock rate, + * determine the acceptable clock range for the monitor (+/- 5%), + * do the calculation in steps to avoid integer overflow + */ + pr_debug("DIU pixclock in ps - %u\n", pixclock); + pixclock = (1000000000 / pixclock) * 1000; + pr_debug("DIU pixclock freq - %u\n", pixclock); + epsilon = pixclock / 20; /* pixclock * 0.05 */ + pr_debug("DIU deviation - %lu\n", epsilon); + minpixclock = pixclock - epsilon; + maxpixclock = pixclock + epsilon; + pr_debug("DIU minpixclock - %lu\n", minpixclock); + pr_debug("DIU maxpixclock - %lu\n", maxpixclock); + + /* + * check whether the DIU supports the desired pixel clock + * + * - simply request the desired clock and see what the + * platform's clock driver will make of it, assuming that it + * will setup the best approximation of the requested value + * - try other candidate frequencies in the order of decreasing + * preference (i.e. with increasing distance from the desired + * pixel clock, and checking the lower frequency before the + * higher frequency to not overload the hardware) until the + * first match is found -- any potential subsequent match + * would only be as good as the former match or typically + * would be less preferrable + * + * the offset increment of pixelclock divided by 64 is an + * arbitrary choice -- it's simple to calculate, in the typical + * case we expect the first check to succeed already, in the + * worst case seven frequencies get tested (the exact center and + * three more values each to the left and to the right) before + * the 5% tolerance window is exceeded, resulting in fast enough + * execution yet high enough probability of finding a suitable + * value, while the error rate will be in the order of single + * percents + */ + for (offset = 0; offset <= epsilon; offset += pixclock / 64) { + want = pixclock - offset; + pr_debug("DIU checking clock - %lu\n", want); + clk_set_rate(clk_diu, want); + got = clk_get_rate(clk_diu); + delta = abs(pixclock - got); + if (delta < epsilon) + break; + if (!offset) + continue; + want = pixclock + offset; + pr_debug("DIU checking clock - %lu\n", want); + clk_set_rate(clk_diu, want); + got = clk_get_rate(clk_diu); + delta = abs(pixclock - got); + if (delta < epsilon) + break; + } + if (offset <= epsilon) { + pr_debug("DIU clock accepted - %lu\n", want); + pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n", + pixclock, got, delta, epsilon); + return; + } + pr_warn("DIU pixclock auto search unsuccessful\n"); + + /* + * what is the most appropriate action to take when the search + * for an available pixel clock which is acceptable to the + * monitor has failed? disable the DIU (clock) or just provide + * a "best effort"? we go with the latter + */ + pr_warn("DIU pixclock best effort fallback (backend's choice)\n"); + clk_set_rate(clk_diu, pixclock); + got = clk_get_rate(clk_diu); + delta = abs(pixclock - got); + pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n", + pixclock, got, delta, epsilon); +} + +static enum fsl_diu_monitor_port +mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port) +{ + return FSL_DIU_PORT_DVI; +} + +static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb; + +static inline void mpc512x_free_bootmem(struct page *page) +{ + BUG_ON(PageTail(page)); + BUG_ON(page_ref_count(page) > 1); + free_reserved_page(page); +} + +static void mpc512x_release_bootmem(void) +{ + unsigned long addr = diu_shared_fb.fb_phys & PAGE_MASK; + unsigned long size = diu_shared_fb.fb_len; + unsigned long start, end; + + if (diu_shared_fb.in_use) { + start = PFN_UP(addr); + end = PFN_DOWN(addr + size); + + for (; start < end; start++) + mpc512x_free_bootmem(pfn_to_page(start)); + + diu_shared_fb.in_use = false; + } + diu_ops.release_bootmem = NULL; +} + +/* + * Check if DIU was pre-initialized. If so, perform steps + * needed to continue displaying through the whole boot process. + * Move area descriptor and gamma table elsewhere, they are + * destroyed by bootmem allocator otherwise. The frame buffer + * address range will be reserved in setup_arch() after bootmem + * allocator is up. + */ +static void __init mpc512x_init_diu(void) +{ + struct device_node *np; + struct diu __iomem *diu_reg; + phys_addr_t desc; + void __iomem *vaddr; + unsigned long mode, pix_fmt, res, bpp; + unsigned long dst; + + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu"); + if (!np) { + pr_err("No DIU node\n"); + return; + } + + diu_reg = of_iomap(np, 0); + of_node_put(np); + if (!diu_reg) { + pr_err("Can't map DIU\n"); + return; + } + + mode = in_be32(&diu_reg->diu_mode); + if (mode == MFB_MODE0) { + pr_info("%s: DIU OFF\n", __func__); + goto out; + } + + desc = in_be32(&diu_reg->desc[0]); + vaddr = ioremap(desc, sizeof(struct diu_ad)); + if (!vaddr) { + pr_err("Can't map DIU area desc.\n"); + goto out; + } + memcpy(&diu_shared_fb.ad0, vaddr, sizeof(struct diu_ad)); + /* flush fb area descriptor */ + dst = (unsigned long)&diu_shared_fb.ad0; + flush_dcache_range(dst, dst + sizeof(struct diu_ad) - 1); + + res = in_be32(&diu_reg->disp_size); + pix_fmt = in_le32(vaddr); + bpp = ((pix_fmt >> 16) & 0x3) + 1; + diu_shared_fb.fb_phys = in_le32(vaddr + 4); + diu_shared_fb.fb_len = ((res & 0xfff0000) >> 16) * (res & 0xfff) * bpp; + diu_shared_fb.in_use = true; + iounmap(vaddr); + + desc = in_be32(&diu_reg->gamma); + vaddr = ioremap(desc, sizeof(diu_shared_fb.gamma)); + if (!vaddr) { + pr_err("Can't map DIU area desc.\n"); + diu_shared_fb.in_use = false; + goto out; + } + memcpy(&diu_shared_fb.gamma, vaddr, sizeof(diu_shared_fb.gamma)); + /* flush gamma table */ + dst = (unsigned long)&diu_shared_fb.gamma; + flush_dcache_range(dst, dst + sizeof(diu_shared_fb.gamma) - 1); + + iounmap(vaddr); + out_be32(&diu_reg->gamma, virt_to_phys(&diu_shared_fb.gamma)); + out_be32(&diu_reg->desc[1], 0); + out_be32(&diu_reg->desc[2], 0); + out_be32(&diu_reg->desc[0], virt_to_phys(&diu_shared_fb.ad0)); + +out: + iounmap(diu_reg); +} + +static void __init mpc512x_setup_diu(void) +{ + int ret; + + /* + * We do not allocate and configure new area for bitmap buffer + * because it would require copying bitmap data (splash image) + * and so negatively affect boot time. Instead we reserve the + * already configured frame buffer area so that it won't be + * destroyed. The starting address of the area to reserve and + * also it's length is passed to memblock_reserve(). It will be + * freed later on first open of fbdev, when splash image is not + * needed any more. + */ + if (diu_shared_fb.in_use) { + ret = memblock_reserve(diu_shared_fb.fb_phys, + diu_shared_fb.fb_len); + if (ret) { + pr_err("%s: reserve bootmem failed\n", __func__); + diu_shared_fb.in_use = false; + } + } + + diu_ops.set_pixel_clock = mpc512x_set_pixel_clock; + diu_ops.valid_monitor_port = mpc512x_valid_monitor_port; + diu_ops.release_bootmem = mpc512x_release_bootmem; +} + +void __init mpc512x_init_IRQ(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-ipic"); + if (!np) + return; + + ipic_init(np, 0); + of_node_put(np); + + /* + * Initialize the default interrupt mapping priorities, + * in case the boot rom changed something on us. + */ + ipic_set_default_priority(); +} + +/* + * Nodes to do bus probe on, soc and localbus + */ +static const struct of_device_id of_bus_ids[] __initconst = { + { .compatible = "fsl,mpc5121-immr", }, + { .compatible = "fsl,mpc5121-localbus", }, + { .compatible = "fsl,mpc5121-mbx", }, + { .compatible = "fsl,mpc5121-nfc", }, + { .compatible = "fsl,mpc5121-sram", }, + { .compatible = "fsl,mpc5121-pci", }, + { .compatible = "gpio-leds", }, + {}, +}; + +static void __init mpc512x_declare_of_platform_devices(void) +{ + if (of_platform_bus_probe(NULL, of_bus_ids, NULL)) + printk(KERN_ERR __FILE__ ": " + "Error while probing of_platform bus\n"); +} + +#define DEFAULT_FIFO_SIZE 16 + +const char *__init mpc512x_select_psc_compat(void) +{ + if (of_machine_is_compatible("fsl,mpc5121")) + return "fsl,mpc5121-psc"; + + if (of_machine_is_compatible("fsl,mpc5125")) + return "fsl,mpc5125-psc"; + + return NULL; +} + +static const char *__init mpc512x_select_reset_compat(void) +{ + if (of_machine_is_compatible("fsl,mpc5121")) + return "fsl,mpc5121-reset"; + + if (of_machine_is_compatible("fsl,mpc5125")) + return "fsl,mpc5125-reset"; + + return NULL; +} + +static unsigned int __init get_fifo_size(struct device_node *np, + char *prop_name) +{ + const unsigned int *fp; + + fp = of_get_property(np, prop_name, NULL); + if (fp) + return *fp; + + pr_warn("no %s property in %pOF node, defaulting to %d\n", + prop_name, np, DEFAULT_FIFO_SIZE); + + return DEFAULT_FIFO_SIZE; +} + +#define FIFOC(_base) ((struct mpc512x_psc_fifo __iomem *) \ + ((u32)(_base) + sizeof(struct mpc52xx_psc))) + +/* Init PSC FIFO space for TX and RX slices */ +static void __init mpc512x_psc_fifo_init(void) +{ + struct device_node *np; + void __iomem *psc; + unsigned int tx_fifo_size; + unsigned int rx_fifo_size; + const char *psc_compat; + int fifobase = 0; /* current fifo address in 32 bit words */ + + psc_compat = mpc512x_select_psc_compat(); + if (!psc_compat) { + pr_err("%s: no compatible devices found\n", __func__); + return; + } + + for_each_compatible_node(np, NULL, psc_compat) { + tx_fifo_size = get_fifo_size(np, "fsl,tx-fifo-size"); + rx_fifo_size = get_fifo_size(np, "fsl,rx-fifo-size"); + + /* size in register is in 4 byte units */ + tx_fifo_size /= 4; + rx_fifo_size /= 4; + if (!tx_fifo_size) + tx_fifo_size = 1; + if (!rx_fifo_size) + rx_fifo_size = 1; + + psc = of_iomap(np, 0); + if (!psc) { + pr_err("%s: Can't map %pOF device\n", + __func__, np); + continue; + } + + /* FIFO space is 4KiB, check if requested size is available */ + if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) { + pr_err("%s: no fifo space available for %pOF\n", + __func__, np); + iounmap(psc); + /* + * chances are that another device requests less + * fifo space, so we continue. + */ + continue; + } + + /* set tx and rx fifo size registers */ + out_be32(&FIFOC(psc)->txsz, (fifobase << 16) | tx_fifo_size); + fifobase += tx_fifo_size; + out_be32(&FIFOC(psc)->rxsz, (fifobase << 16) | rx_fifo_size); + fifobase += rx_fifo_size; + + /* reset and enable the slices */ + out_be32(&FIFOC(psc)->txcmd, 0x80); + out_be32(&FIFOC(psc)->txcmd, 0x01); + out_be32(&FIFOC(psc)->rxcmd, 0x80); + out_be32(&FIFOC(psc)->rxcmd, 0x01); + + iounmap(psc); + } +} + +static void __init mpc512x_restart_init(void) +{ + struct device_node *np; + const char *reset_compat; + + reset_compat = mpc512x_select_reset_compat(); + np = of_find_compatible_node(NULL, NULL, reset_compat); + if (!np) + return; + + reset_module_base = of_iomap(np, 0); + of_node_put(np); +} + +void __init mpc512x_init_early(void) +{ + mpc512x_restart_init(); + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_init_diu(); +} + +void __init mpc512x_init(void) +{ + mpc5121_clk_init(); + mpc512x_declare_of_platform_devices(); + mpc512x_psc_fifo_init(); +} + +void __init mpc512x_setup_arch(void) +{ + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_setup_diu(); +} + +/** + * mpc512x_cs_config - Setup chip select configuration + * @cs: chip select number + * @val: chip select configuration value + * + * Perform chip select configuration for devices on LocalPlus Bus. + * Intended to dynamically reconfigure the chip select parameters + * for configurable devices on the bus. + */ +int mpc512x_cs_config(unsigned int cs, u32 val) +{ + static struct mpc512x_lpc __iomem *lpc; + struct device_node *np; + + if (cs > 7) + return -EINVAL; + + if (!lpc) { + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-lpc"); + lpc = of_iomap(np, 0); + of_node_put(np); + if (!lpc) + return -ENOMEM; + } + + out_be32(&lpc->cs_cfg[cs], val); + return 0; +} +EXPORT_SYMBOL(mpc512x_cs_config); diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c new file mode 100644 index 0000000000..ce51cfeeb0 --- /dev/null +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2010 DENX Software Engineering + * + * Anatolij Gustschin, + * + * PDM360NG board setup + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc512x.h" + +#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \ + defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) +#include +#include +#include +#include + +static void *pdm360ng_gpio_base; + +static int pdm360ng_get_pendown_state(void) +{ + u32 reg; + + reg = in_be32(pdm360ng_gpio_base + 0xc); + if (reg & 0x40) + setbits32(pdm360ng_gpio_base + 0xc, 0x40); + + reg = in_be32(pdm360ng_gpio_base + 0x8); + + /* return 1 if pen is down */ + return (reg & 0x40) == 0; +} + +static struct ads7846_platform_data pdm360ng_ads7846_pdata = { + .model = 7845, + .get_pendown_state = pdm360ng_get_pendown_state, + .irq_flags = IRQF_TRIGGER_LOW, +}; + +static int __init pdm360ng_penirq_init(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-gpio"); + if (!np) { + pr_err("%s: Can't find 'mpc5121-gpio' node\n", __func__); + return -ENODEV; + } + + pdm360ng_gpio_base = of_iomap(np, 0); + of_node_put(np); + if (!pdm360ng_gpio_base) { + pr_err("%s: Can't map gpio regs.\n", __func__); + return -ENODEV; + } + out_be32(pdm360ng_gpio_base + 0xc, 0xffffffff); + setbits32(pdm360ng_gpio_base + 0x18, 0x2000); + setbits32(pdm360ng_gpio_base + 0x10, 0x40); + + return 0; +} + +static int pdm360ng_touchscreen_notifier_call(struct notifier_block *nb, + unsigned long event, void *__dev) +{ + struct device *dev = __dev; + + if ((event == BUS_NOTIFY_ADD_DEVICE) && + of_device_is_compatible(dev->of_node, "ti,ads7846")) { + dev->platform_data = &pdm360ng_ads7846_pdata; + return NOTIFY_OK; + } + return NOTIFY_DONE; +} + +static struct notifier_block pdm360ng_touchscreen_nb = { + .notifier_call = pdm360ng_touchscreen_notifier_call, +}; + +static void __init pdm360ng_touchscreen_init(void) +{ + if (pdm360ng_penirq_init()) + return; + + bus_register_notifier(&spi_bus_type, &pdm360ng_touchscreen_nb); +} +#else +static inline void __init pdm360ng_touchscreen_init(void) +{ +} +#endif /* CONFIG_TOUCHSCREEN_ADS7846 */ + +void __init pdm360ng_init(void) +{ + mpc512x_init(); + pdm360ng_touchscreen_init(); +} + +static int __init pdm360ng_probe(void) +{ + mpc512x_init_early(); + + return 1; +} + +define_machine(pdm360ng) { + .name = "PDM360NG", + .compatible = "ifm,pdm360ng", + .probe = pdm360ng_probe, + .setup_arch = mpc512x_setup_arch, + .init = pdm360ng_init, + .init_IRQ = mpc512x_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc512x_restart, +}; diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig new file mode 100644 index 0000000000..384e4bef2c --- /dev/null +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MPC52xx + bool "52xx-based boards" + depends on PPC_BOOK3S_32 + select COMMON_CLK + select HAVE_PCI + +config PPC_MPC5200_SIMPLE + bool "Generic support for simple MPC5200 based boards" + depends on PPC_MPC52xx + select DEFAULT_UIMAGE + help + This option enables support for a simple MPC52xx based boards which + do not need a custom platform specific setup. Such boards are + supported assuming the following: + + - GPIO pins are configured by the firmware, + - CDM configuration (clocking) is setup correctly by firmware, + - if the 'fsl,has-wdt' property is present in one of the + gpt nodes, then it is safe to use such gpt to reset the board, + - PCI is supported if enabled in the kernel configuration + and if there is a PCI bus node defined in the device tree. + + Boards that are compatible with this generic platform support + are: + intercontrol,digsy-mtc + phytec,pcm030 + phytec,pcm032 + promess,motionpro + schindler,cm5200 + tqc,tqm5200 + +config PPC_EFIKA + bool "bPlan Efika 5k2. MPC5200B based computer" + depends on PPC_MPC52xx + select PPC_RTAS + select PPC_HASH_MMU_NATIVE + +config PPC_LITE5200 + bool "Freescale Lite5200 Eval Board" + depends on PPC_MPC52xx + select DEFAULT_UIMAGE + +config PPC_MEDIA5200 + bool "Freescale Media5200 Eval Board" + depends on PPC_MPC52xx + select DEFAULT_UIMAGE + +config PPC_MPC5200_BUGFIX + bool "MPC5200 (L25R) bugfix support" + depends on PPC_MPC52xx + help + Enable workarounds for original MPC5200 errata. This is not required + for MPC5200B based boards. + + It is safe to say 'Y' here diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile new file mode 100644 index 0000000000..1b1f72d833 --- /dev/null +++ b/arch/powerpc/platforms/52xx/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for 52xx based boards +# +obj-y += mpc52xx_pic.o mpc52xx_common.o mpc52xx_gpt.o +obj-$(CONFIG_PCI) += mpc52xx_pci.o + +obj-$(CONFIG_PPC_MPC5200_SIMPLE) += mpc5200_simple.o +obj-$(CONFIG_PPC_EFIKA) += efika.o +obj-$(CONFIG_PPC_LITE5200) += lite5200.o +obj-$(CONFIG_PPC_MEDIA5200) += media5200.o + +obj-$(CONFIG_PM) += mpc52xx_sleep.o mpc52xx_pm.o +ifdef CONFIG_PPC_LITE5200 + obj-$(CONFIG_PM) += lite5200_sleep.o lite5200_pm.o +endif diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c new file mode 100644 index 0000000000..aa82e6b437 --- /dev/null +++ b/arch/powerpc/platforms/52xx/efika.c @@ -0,0 +1,233 @@ +/* + * Efika 5K2 platform code + * Some code really inspired from the lite5200b platform. + * + * Copyright (C) 2006 bplan GmbH + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EFIKA_PLATFORM_NAME "Efika" + + +/* ------------------------------------------------------------------------ */ +/* PCI accesses thru RTAS */ +/* ------------------------------------------------------------------------ */ + +#ifdef CONFIG_PCI + +/* + * Access functions for PCI config space using RTAS calls. + */ +static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 * val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8) + | (((bus->number - hose->first_busno) & 0xff) << 16) + | (hose->global_number << 24); + int ret = -1; + int rval; + + rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len); + *val = ret; + return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; +} + +static int rtas_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8) + | (((bus->number - hose->first_busno) & 0xff) << 16) + | (hose->global_number << 24); + int rval; + + rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL, + addr, len, val); + return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops rtas_pci_ops = { + .read = rtas_read_config, + .write = rtas_write_config, +}; + + +static void __init efika_pcisetup(void) +{ + const int *bus_range; + int len; + struct pci_controller *hose; + struct device_node *root; + struct device_node *pcictrl; + + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_WARNING EFIKA_PLATFORM_NAME + ": Unable to find the root node\n"); + return; + } + + for_each_child_of_node(root, pcictrl) + if (of_node_name_eq(pcictrl, "pci")) + break; + + of_node_put(root); + + if (pcictrl == NULL) { + printk(KERN_WARNING EFIKA_PLATFORM_NAME + ": Unable to find the PCI bridge node\n"); + return; + } + + bus_range = of_get_property(pcictrl, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING EFIKA_PLATFORM_NAME + ": Can't get bus-range for %pOF\n", pcictrl); + goto out_put; + } + + if (bus_range[1] == bus_range[0]) + printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI bus %d", + bus_range[0]); + else + printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI buses %d..%d", + bus_range[0], bus_range[1]); + printk(" controlled by %pOF\n", pcictrl); + printk("\n"); + + hose = pcibios_alloc_controller(pcictrl); + if (!hose) { + printk(KERN_WARNING EFIKA_PLATFORM_NAME + ": Can't allocate PCI controller structure for %pOF\n", + pcictrl); + goto out_put; + } + + hose->first_busno = bus_range[0]; + hose->last_busno = bus_range[1]; + hose->ops = &rtas_pci_ops; + + pci_process_bridge_OF_ranges(hose, pcictrl, 0); + return; +out_put: + of_node_put(pcictrl); +} + +#else +static void __init efika_pcisetup(void) +{} +#endif + + + +/* ------------------------------------------------------------------------ */ +/* Platform setup */ +/* ------------------------------------------------------------------------ */ + +static void efika_show_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *revision; + const char *codegendescription; + const char *codegenvendor; + + root = of_find_node_by_path("/"); + if (!root) + return; + + revision = of_get_property(root, "revision", NULL); + codegendescription = of_get_property(root, "CODEGEN,description", NULL); + codegenvendor = of_get_property(root, "CODEGEN,vendor", NULL); + + if (codegendescription) + seq_printf(m, "machine\t\t: %s\n", codegendescription); + else + seq_printf(m, "machine\t\t: Efika\n"); + + if (revision) + seq_printf(m, "revision\t: %s\n", revision); + + if (codegenvendor) + seq_printf(m, "vendor\t\t: %s\n", codegenvendor); + + of_node_put(root); +} + +#ifdef CONFIG_PM +static void efika_suspend_prepare(void __iomem *mbar) +{ + u8 pin = 4; /* GPIO_WKUP_4 (GPIO_PSC6_0 - IRDA_RX) */ + u8 level = 1; /* wakeup on high level */ + /* IOW. to wake it up, short pins 1 and 3 on IRDA connector */ + mpc52xx_set_wakeup_gpio(pin, level); +} +#endif + +static void __init efika_setup_arch(void) +{ + rtas_initialize(); + + /* Map important registers from the internal memory map */ + mpc52xx_map_common_devices(); + +#ifdef CONFIG_PM + mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare; + mpc52xx_pm_init(); +#endif + + if (ppc_md.progress) + ppc_md.progress("Linux/PPC " UTS_RELEASE " running on Efika ;-)\n", 0x0); +} + +static int __init efika_probe(void) +{ + const char *model = of_get_property(of_root, "model", NULL); + + if (model == NULL) + return 0; + if (strcmp(model, "EFIKA5K2")) + return 0; + + DMA_MODE_READ = 0x44; + DMA_MODE_WRITE = 0x48; + + pm_power_off = rtas_power_off; + + return 1; +} + +define_machine(efika) +{ + .name = EFIKA_PLATFORM_NAME, + .probe = efika_probe, + .setup_arch = efika_setup_arch, + .discover_phbs = efika_pcisetup, + .init = mpc52xx_declare_of_platform_devices, + .show_cpuinfo = efika_show_cpuinfo, + .init_IRQ = mpc52xx_init_irq, + .get_irq = mpc52xx_get_irq, + .restart = rtas_restart, + .halt = rtas_halt, + .set_rtc_time = rtas_set_rtc_time, + .get_rtc_time = rtas_get_rtc_time, + .progress = rtas_progress, + .get_boot_time = rtas_get_boot_time, +#ifdef CONFIG_PCI + .phys_mem_access_prot = pci_phys_mem_access_prot, +#endif +}; + diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c new file mode 100644 index 0000000000..0fd67b3ffc --- /dev/null +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale Lite5200 board support + * + * Written by: Grant Likely + * + * Copyright (C) Secret Lab Technologies Ltd. 2006. All rights reserved. + * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved. + * + * Description: + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* ************************************************************************ + * + * Setup the architecture + * + */ + +/* mpc5200 device tree match tables */ +static const struct of_device_id mpc5200_cdm_ids[] __initconst = { + { .compatible = "fsl,mpc5200-cdm", }, + { .compatible = "mpc5200-cdm", }, + {} +}; + +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { + { .compatible = "fsl,mpc5200-gpio", }, + { .compatible = "mpc5200-gpio", }, + {} +}; + +/* + * Fix clock configuration. + * + * Firmware is supposed to be responsible for this. If you are creating a + * new board port, do *NOT* duplicate this code. Fix your boot firmware + * to set it correctly in the first place + */ +static void __init +lite5200_fix_clock_config(void) +{ + struct device_node *np; + struct mpc52xx_cdm __iomem *cdm; + /* Map zones */ + np = of_find_matching_node(NULL, mpc5200_cdm_ids); + cdm = of_iomap(np, 0); + of_node_put(np); + if (!cdm) { + printk(KERN_ERR "%s() failed; expect abnormal behaviour\n", + __func__); + return; + } + + /* Use internal 48 Mhz */ + out_8(&cdm->ext_48mhz_en, 0x00); + out_8(&cdm->fd_enable, 0x01); + if (in_be32(&cdm->rstcfg) & 0x40) /* Assumes 33Mhz clock */ + out_be16(&cdm->fd_counters, 0x0001); + else + out_be16(&cdm->fd_counters, 0x5555); + + /* Unmap the regs */ + iounmap(cdm); +} + +/* + * Fix setting of port_config register. + * + * Firmware is supposed to be responsible for this. If you are creating a + * new board port, do *NOT* duplicate this code. Fix your boot firmware + * to set it correctly in the first place + */ +static void __init +lite5200_fix_port_config(void) +{ + struct device_node *np; + struct mpc52xx_gpio __iomem *gpio; + u32 port_config; + + np = of_find_matching_node(NULL, mpc5200_gpio_ids); + gpio = of_iomap(np, 0); + of_node_put(np); + if (!gpio) { + printk(KERN_ERR "%s() failed. expect abnormal behavior\n", + __func__); + return; + } + + /* Set port config */ + port_config = in_be32(&gpio->port_config); + + port_config &= ~0x00800000; /* 48Mhz internal, pin is GPIO */ + + port_config &= ~0x00007000; /* USB port : Differential mode */ + port_config |= 0x00001000; /* USB 1 only */ + + port_config &= ~0x03000000; /* ATA CS is on csb_4/5 */ + port_config |= 0x01000000; + + pr_debug("port_config: old:%x new:%x\n", + in_be32(&gpio->port_config), port_config); + out_be32(&gpio->port_config, port_config); + + /* Unmap zone */ + iounmap(gpio); +} + +#ifdef CONFIG_PM +static void lite5200_suspend_prepare(void __iomem *mbar) +{ + u8 pin = 1; /* GPIO_WKUP_1 (GPIO_PSC2_4) */ + u8 level = 0; /* wakeup on low level */ + mpc52xx_set_wakeup_gpio(pin, level); + + /* + * power down usb port + * this needs to be called before of-ohci suspend code + */ + + /* set ports to "power switched" and "powered at the same time" + * USB Rh descriptor A: NPS = 0, PSM = 0 */ + out_be32(mbar + 0x1048, in_be32(mbar + 0x1048) & ~0x300); + /* USB Rh status: LPS = 1 - turn off power */ + out_be32(mbar + 0x1050, 0x00000001); +} + +static void lite5200_resume_finish(void __iomem *mbar) +{ + /* USB Rh status: LPSC = 1 - turn on power */ + out_be32(mbar + 0x1050, 0x00010000); +} +#endif + +static void __init lite5200_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("lite5200_setup_arch()", 0); + + /* Map important registers from the internal memory map */ + mpc52xx_map_common_devices(); + + /* Some mpc5200 & mpc5200b related configuration */ + mpc5200_setup_xlb_arbiter(); + + /* Fix things that firmware should have done. */ + lite5200_fix_clock_config(); + lite5200_fix_port_config(); + +#ifdef CONFIG_PM + mpc52xx_suspend.board_suspend_prepare = lite5200_suspend_prepare; + mpc52xx_suspend.board_resume_finish = lite5200_resume_finish; + lite5200_pm_init(); +#endif +} + +static const char * const board[] __initconst = { + "fsl,lite5200", + "fsl,lite5200b", + NULL, +}; + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init lite5200_probe(void) +{ + return of_device_compatible_match(of_root, board); +} + +define_machine(lite5200) { + .name = "lite5200", + .probe = lite5200_probe, + .setup_arch = lite5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, + .init = mpc52xx_declare_of_platform_devices, + .init_IRQ = mpc52xx_init_irq, + .get_irq = mpc52xx_get_irq, + .restart = mpc52xx_restart, +}; diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c new file mode 100644 index 0000000000..4900f5f48c --- /dev/null +++ b/arch/powerpc/platforms/52xx/lite5200_pm.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include +#include +#include +#include + +/* defined in lite5200_sleep.S and only used here */ +extern void lite5200_low_power(void __iomem *sram, void __iomem *mbar); + +static struct mpc52xx_cdm __iomem *cdm; +static struct mpc52xx_intr __iomem *pic; +static struct mpc52xx_sdma __iomem *bes; +static struct mpc52xx_xlb __iomem *xlb; +static struct mpc52xx_gpio __iomem *gps; +static struct mpc52xx_gpio_wkup __iomem *gpw; +static void __iomem *pci; +static void __iomem *sram; +static const int sram_size = 0x4000; /* 16 kBytes */ +static void __iomem *mbar; + +static suspend_state_t lite5200_pm_target_state; + +static int lite5200_pm_valid(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + return 1; + default: + return 0; + } +} + +static int lite5200_pm_begin(suspend_state_t state) +{ + if (lite5200_pm_valid(state)) { + lite5200_pm_target_state = state; + return 0; + } + return -EINVAL; +} + +static int lite5200_pm_prepare(void) +{ + struct device_node *np; + static const struct of_device_id immr_ids[] = { + { .compatible = "fsl,mpc5200-immr", }, + { .compatible = "fsl,mpc5200b-immr", }, + { .type = "soc", .compatible = "mpc5200", }, /* lite5200 */ + { .type = "builtin", .compatible = "mpc5200", }, /* efika */ + {} + }; + struct resource res; + + /* deep sleep? let mpc52xx code handle that */ + if (lite5200_pm_target_state == PM_SUSPEND_STANDBY) + return mpc52xx_pm_prepare(); + + if (lite5200_pm_target_state != PM_SUSPEND_MEM) + return -EINVAL; + + /* map registers */ + np = of_find_matching_node(NULL, immr_ids); + of_address_to_resource(np, 0, &res); + of_node_put(np); + + mbar = ioremap(res.start, 0xC000); + if (!mbar) { + printk(KERN_ERR "%s:%i Error mapping registers\n", __func__, __LINE__); + return -ENOSYS; + } + + cdm = mbar + 0x200; + pic = mbar + 0x500; + gps = mbar + 0xb00; + gpw = mbar + 0xc00; + pci = mbar + 0xd00; + bes = mbar + 0x1200; + xlb = mbar + 0x1f00; + sram = mbar + 0x8000; + + return 0; +} + +/* save and restore registers not bound to any real devices */ +static struct mpc52xx_cdm scdm; +static struct mpc52xx_intr spic; +static struct mpc52xx_sdma sbes; +static struct mpc52xx_xlb sxlb; +static struct mpc52xx_gpio sgps; +static struct mpc52xx_gpio_wkup sgpw; +static char spci[0x200]; + +static void lite5200_save_regs(void) +{ + _memcpy_fromio(&spic, pic, sizeof(*pic)); + _memcpy_fromio(&sbes, bes, sizeof(*bes)); + _memcpy_fromio(&scdm, cdm, sizeof(*cdm)); + _memcpy_fromio(&sxlb, xlb, sizeof(*xlb)); + _memcpy_fromio(&sgps, gps, sizeof(*gps)); + _memcpy_fromio(&sgpw, gpw, sizeof(*gpw)); + _memcpy_fromio(spci, pci, 0x200); + + _memcpy_fromio(saved_sram, sram, sram_size); +} + +static void lite5200_restore_regs(void) +{ + int i; + _memcpy_toio(sram, saved_sram, sram_size); + + /* PCI Configuration */ + _memcpy_toio(pci, spci, 0x200); + + /* + * GPIOs. Interrupt Master Enable has higher address then other + * registers, so just memcpy is ok. + */ + _memcpy_toio(gpw, &sgpw, sizeof(*gpw)); + _memcpy_toio(gps, &sgps, sizeof(*gps)); + + + /* XLB Arbitrer */ + out_be32(&xlb->snoop_window, sxlb.snoop_window); + out_be32(&xlb->master_priority, sxlb.master_priority); + out_be32(&xlb->master_pri_enable, sxlb.master_pri_enable); + + /* enable */ + out_be32(&xlb->int_enable, sxlb.int_enable); + out_be32(&xlb->config, sxlb.config); + + + /* CDM - Clock Distribution Module */ + out_8(&cdm->ipb_clk_sel, scdm.ipb_clk_sel); + out_8(&cdm->pci_clk_sel, scdm.pci_clk_sel); + + out_8(&cdm->ext_48mhz_en, scdm.ext_48mhz_en); + out_8(&cdm->fd_enable, scdm.fd_enable); + out_be16(&cdm->fd_counters, scdm.fd_counters); + + out_be32(&cdm->clk_enables, scdm.clk_enables); + + out_8(&cdm->osc_disable, scdm.osc_disable); + + out_be16(&cdm->mclken_div_psc1, scdm.mclken_div_psc1); + out_be16(&cdm->mclken_div_psc2, scdm.mclken_div_psc2); + out_be16(&cdm->mclken_div_psc3, scdm.mclken_div_psc3); + out_be16(&cdm->mclken_div_psc6, scdm.mclken_div_psc6); + + + /* BESTCOMM */ + out_be32(&bes->taskBar, sbes.taskBar); + out_be32(&bes->currentPointer, sbes.currentPointer); + out_be32(&bes->endPointer, sbes.endPointer); + out_be32(&bes->variablePointer, sbes.variablePointer); + + out_8(&bes->IntVect1, sbes.IntVect1); + out_8(&bes->IntVect2, sbes.IntVect2); + out_be16(&bes->PtdCntrl, sbes.PtdCntrl); + + for (i=0; i<32; i++) + out_8(&bes->ipr[i], sbes.ipr[i]); + + out_be32(&bes->cReqSelect, sbes.cReqSelect); + out_be32(&bes->task_size0, sbes.task_size0); + out_be32(&bes->task_size1, sbes.task_size1); + out_be32(&bes->MDEDebug, sbes.MDEDebug); + out_be32(&bes->ADSDebug, sbes.ADSDebug); + out_be32(&bes->Value1, sbes.Value1); + out_be32(&bes->Value2, sbes.Value2); + out_be32(&bes->Control, sbes.Control); + out_be32(&bes->Status, sbes.Status); + out_be32(&bes->PTDDebug, sbes.PTDDebug); + + /* restore tasks */ + for (i=0; i<16; i++) + out_be16(&bes->tcr[i], sbes.tcr[i]); + + /* enable interrupts */ + out_be32(&bes->IntPend, sbes.IntPend); + out_be32(&bes->IntMask, sbes.IntMask); + + + /* PIC */ + out_be32(&pic->per_pri1, spic.per_pri1); + out_be32(&pic->per_pri2, spic.per_pri2); + out_be32(&pic->per_pri3, spic.per_pri3); + + out_be32(&pic->main_pri1, spic.main_pri1); + out_be32(&pic->main_pri2, spic.main_pri2); + + out_be32(&pic->enc_status, spic.enc_status); + + /* unmask and enable interrupts */ + out_be32(&pic->per_mask, spic.per_mask); + out_be32(&pic->main_mask, spic.main_mask); + out_be32(&pic->ctrl, spic.ctrl); +} + +static int lite5200_pm_enter(suspend_state_t state) +{ + /* deep sleep? let mpc52xx code handle that */ + if (state == PM_SUSPEND_STANDBY) { + return mpc52xx_pm_enter(state); + } + + lite5200_save_regs(); + + /* effectively save FP regs */ + enable_kernel_fp(); + + lite5200_low_power(sram, mbar); + + lite5200_restore_regs(); + + iounmap(mbar); + return 0; +} + +static void lite5200_pm_finish(void) +{ + /* deep sleep? let mpc52xx code handle that */ + if (lite5200_pm_target_state == PM_SUSPEND_STANDBY) + mpc52xx_pm_finish(); +} + +static void lite5200_pm_end(void) +{ + lite5200_pm_target_state = PM_SUSPEND_ON; +} + +static const struct platform_suspend_ops lite5200_pm_ops = { + .valid = lite5200_pm_valid, + .begin = lite5200_pm_begin, + .prepare = lite5200_pm_prepare, + .enter = lite5200_pm_enter, + .finish = lite5200_pm_finish, + .end = lite5200_pm_end, +}; + +int __init lite5200_pm_init(void) +{ + suspend_set_ops(&lite5200_pm_ops); + return 0; +} diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S new file mode 100644 index 0000000000..0b12647e7b --- /dev/null +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -0,0 +1,422 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +#include +#include +#include +#include + + +#define SDRAM_CTRL 0x104 +#define SC_MODE_EN (1<<31) +#define SC_CKE (1<<30) +#define SC_REF_EN (1<<28) +#define SC_SOFT_PRE (1<<1) + +#define GPIOW_GPIOE 0xc00 +#define GPIOW_DDR 0xc08 +#define GPIOW_DVO 0xc0c + +#define CDM_CE 0x214 +#define CDM_SDRAM (1<<3) + + +/* helpers... beware: r10 and r4 are overwritten */ +#define SAVE_SPRN(reg, addr) \ + mfspr r10, SPRN_##reg; \ + stw r10, ((addr)*4)(r4); + +#define LOAD_SPRN(reg, addr) \ + lwz r10, ((addr)*4)(r4); \ + mtspr SPRN_##reg, r10; \ + sync; \ + isync; + + + .data +registers: + .space 0x5c*4 + .text + +/* ---------------------------------------------------------------------- */ +/* low-power mode with help of M68HLC908QT1 */ + + .globl lite5200_low_power +lite5200_low_power: + + mr r7, r3 /* save SRAM va */ + mr r8, r4 /* save MBAR va */ + + /* setup wakeup address for u-boot at physical location 0x0 */ + lis r3, CONFIG_KERNEL_START@h + lis r4, lite5200_wakeup@h + ori r4, r4, lite5200_wakeup@l + sub r4, r4, r3 + stw r4, 0(r3) + + + /* + * save stuff BDI overwrites + * 0xf0 (0xe0->0x100 gets overwritten when BDI connected; + * even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?)) + * WARNING: self-refresh doesn't seem to work when BDI2000 is connected, + * possibly because BDI sets SDRAM registers before wakeup code does + */ + lis r4, registers@h + ori r4, r4, registers@l + lwz r10, 0xf0(r3) + stw r10, (0x1d*4)(r4) + + /* save registers to r4 [destroys r10] */ + SAVE_SPRN(LR, 0x1c) + bl save_regs + + /* flush caches [destroys r3, r4] */ + bl flush_data_cache + + + /* copy code to sram */ + mr r4, r7 + li r3, (sram_code_end - sram_code)/4 + mtctr r3 + lis r3, sram_code@h + ori r3, r3, sram_code@l +1: + lwz r5, 0(r3) + stw r5, 0(r4) + addi r3, r3, 4 + addi r4, r4, 4 + bdnz 1b + + /* get tb_ticks_per_usec */ + lis r3, tb_ticks_per_usec@h + lwz r11, tb_ticks_per_usec@l(r3) + + /* disable I and D caches */ + mfspr r3, SPRN_HID0 + ori r3, r3, HID0_ICE | HID0_DCE + xori r3, r3, HID0_ICE | HID0_DCE + sync; isync; + mtspr SPRN_HID0, r3 + sync; isync; + + /* jump to sram */ + mtlr r7 + blrl + /* doesn't return */ + + +sram_code: + /* self refresh */ + lwz r4, SDRAM_CTRL(r8) + + /* send NOP (precharge) */ + oris r4, r4, SC_MODE_EN@h /* mode_en */ + stw r4, SDRAM_CTRL(r8) + sync + + ori r4, r4, SC_SOFT_PRE /* soft_pre */ + stw r4, SDRAM_CTRL(r8) + sync + xori r4, r4, SC_SOFT_PRE + + xoris r4, r4, SC_MODE_EN@h /* !mode_en */ + stw r4, SDRAM_CTRL(r8) + sync + + /* delay (for NOP to finish) */ + li r12, 1 + bl udelay + + /* + * mode_en must not be set when enabling self-refresh + * send AR with CKE low (self-refresh) + */ + oris r4, r4, (SC_REF_EN | SC_CKE)@h + xoris r4, r4, (SC_CKE)@h /* ref_en !cke */ + stw r4, SDRAM_CTRL(r8) + sync + + /* delay (after !CKE there should be two cycles) */ + li r12, 1 + bl udelay + + /* disable clock */ + lwz r4, CDM_CE(r8) + ori r4, r4, CDM_SDRAM + xori r4, r4, CDM_SDRAM + stw r4, CDM_CE(r8) + sync + + /* delay a bit */ + li r12, 1 + bl udelay + + + /* turn off with QT chip */ + li r4, 0x02 + stb r4, GPIOW_GPIOE(r8) /* enable gpio_wkup1 */ + sync + + stb r4, GPIOW_DVO(r8) /* "output" high */ + sync + stb r4, GPIOW_DDR(r8) /* output */ + sync + stb r4, GPIOW_DVO(r8) /* output high */ + sync + + /* 10uS delay */ + li r12, 10 + bl udelay + + /* turn off */ + li r4, 0 + stb r4, GPIOW_DVO(r8) /* output low */ + sync + + /* wait until we're offline */ + 1: + b 1b + + + /* local udelay in sram is needed */ +SYM_FUNC_START_LOCAL(udelay) + /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ + mullw r12, r12, r11 + mftb r13 /* start */ + add r12, r13, r12 /* end */ + 1: + mftb r13 /* current */ + cmp cr0, r13, r12 + blt 1b + blr +SYM_FUNC_END(udelay) + +sram_code_end: + + + +/* uboot jumps here on resume */ +lite5200_wakeup: + bl restore_regs + + + /* HIDs, MSR */ + LOAD_SPRN(HID1, 0x19) + LOAD_SPRN(HID2, 0x1a) + + + /* address translation is tricky (see turn_on_mmu) */ + mfmsr r10 + ori r10, r10, MSR_DR | MSR_IR + + + mtspr SPRN_SRR1, r10 + lis r10, mmu_on@h + ori r10, r10, mmu_on@l + mtspr SPRN_SRR0, r10 + sync + rfi +mmu_on: + /* kernel offset (r4 is still set from restore_registers) */ + addis r4, r4, CONFIG_KERNEL_START@h + + + /* restore MSR */ + lwz r10, (4*0x1b)(r4) + mtmsr r10 + sync; isync; + + /* invalidate caches */ + mfspr r10, SPRN_HID0 + ori r5, r10, HID0_ICFI | HID0_DCI + mtspr SPRN_HID0, r5 /* invalidate caches */ + sync; isync; + mtspr SPRN_HID0, r10 + sync; isync; + + /* enable caches */ + lwz r10, (4*0x18)(r4) + mtspr SPRN_HID0, r10 /* restore (enable caches, DPM) */ + /* ^ this has to be after address translation set in MSR */ + sync + isync + + + /* restore 0xf0 (BDI2000) */ + lis r3, CONFIG_KERNEL_START@h + lwz r10, (0x1d*4)(r4) + stw r10, 0xf0(r3) + + LOAD_SPRN(LR, 0x1c) + + + blr +_ASM_NOKPROBE_SYMBOL(lite5200_wakeup) + + +/* ---------------------------------------------------------------------- */ +/* boring code: helpers */ + +/* save registers */ +#define SAVE_BAT(n, addr) \ + SAVE_SPRN(DBAT##n##L, addr); \ + SAVE_SPRN(DBAT##n##U, addr+1); \ + SAVE_SPRN(IBAT##n##L, addr+2); \ + SAVE_SPRN(IBAT##n##U, addr+3); + +#define SAVE_SR(n, addr) \ + mfsr r10, n; \ + stw r10, ((addr)*4)(r4); + +#define SAVE_4SR(n, addr) \ + SAVE_SR(n, addr); \ + SAVE_SR(n+1, addr+1); \ + SAVE_SR(n+2, addr+2); \ + SAVE_SR(n+3, addr+3); + +SYM_FUNC_START_LOCAL(save_regs) + stw r0, 0(r4) + stw r1, 0x4(r4) + stw r2, 0x8(r4) + stmw r11, 0xc(r4) /* 0xc -> 0x5f, (0x18*4-1) */ + + SAVE_SPRN(HID0, 0x18) + SAVE_SPRN(HID1, 0x19) + SAVE_SPRN(HID2, 0x1a) + mfmsr r10 + stw r10, (4*0x1b)(r4) + /*SAVE_SPRN(LR, 0x1c) have to save it before the call */ + /* 0x1d reserved by 0xf0 */ + SAVE_SPRN(RPA, 0x1e) + SAVE_SPRN(SDR1, 0x1f) + + /* save MMU regs */ + SAVE_BAT(0, 0x20) + SAVE_BAT(1, 0x24) + SAVE_BAT(2, 0x28) + SAVE_BAT(3, 0x2c) + SAVE_BAT(4, 0x30) + SAVE_BAT(5, 0x34) + SAVE_BAT(6, 0x38) + SAVE_BAT(7, 0x3c) + + SAVE_4SR(0, 0x40) + SAVE_4SR(4, 0x44) + SAVE_4SR(8, 0x48) + SAVE_4SR(12, 0x4c) + + SAVE_SPRN(SPRG0, 0x50) + SAVE_SPRN(SPRG1, 0x51) + SAVE_SPRN(SPRG2, 0x52) + SAVE_SPRN(SPRG3, 0x53) + SAVE_SPRN(SPRG4, 0x54) + SAVE_SPRN(SPRG5, 0x55) + SAVE_SPRN(SPRG6, 0x56) + SAVE_SPRN(SPRG7, 0x57) + + SAVE_SPRN(IABR, 0x58) + SAVE_SPRN(DABR, 0x59) + SAVE_SPRN(TBRL, 0x5a) + SAVE_SPRN(TBRU, 0x5b) + + blr +SYM_FUNC_END(save_regs) + + +/* restore registers */ +#define LOAD_BAT(n, addr) \ + LOAD_SPRN(DBAT##n##L, addr); \ + LOAD_SPRN(DBAT##n##U, addr+1); \ + LOAD_SPRN(IBAT##n##L, addr+2); \ + LOAD_SPRN(IBAT##n##U, addr+3); + +#define LOAD_SR(n, addr) \ + lwz r10, ((addr)*4)(r4); \ + mtsr n, r10; + +#define LOAD_4SR(n, addr) \ + LOAD_SR(n, addr); \ + LOAD_SR(n+1, addr+1); \ + LOAD_SR(n+2, addr+2); \ + LOAD_SR(n+3, addr+3); + +SYM_FUNC_START_LOCAL(restore_regs) + lis r4, registers@h + ori r4, r4, registers@l + + /* MMU is not up yet */ + subis r4, r4, CONFIG_KERNEL_START@h + + lwz r0, 0(r4) + lwz r1, 0x4(r4) + lwz r2, 0x8(r4) + lmw r11, 0xc(r4) + + /* + * these are a bit tricky + * + * 0x18 - HID0 + * 0x19 - HID1 + * 0x1a - HID2 + * 0x1b - MSR + * 0x1c - LR + * 0x1d - reserved by 0xf0 (BDI2000) + */ + LOAD_SPRN(RPA, 0x1e); + LOAD_SPRN(SDR1, 0x1f); + + /* restore MMU regs */ + LOAD_BAT(0, 0x20) + LOAD_BAT(1, 0x24) + LOAD_BAT(2, 0x28) + LOAD_BAT(3, 0x2c) + LOAD_BAT(4, 0x30) + LOAD_BAT(5, 0x34) + LOAD_BAT(6, 0x38) + LOAD_BAT(7, 0x3c) + + LOAD_4SR(0, 0x40) + LOAD_4SR(4, 0x44) + LOAD_4SR(8, 0x48) + LOAD_4SR(12, 0x4c) + + /* rest of regs */ + LOAD_SPRN(SPRG0, 0x50); + LOAD_SPRN(SPRG1, 0x51); + LOAD_SPRN(SPRG2, 0x52); + LOAD_SPRN(SPRG3, 0x53); + LOAD_SPRN(SPRG4, 0x54); + LOAD_SPRN(SPRG5, 0x55); + LOAD_SPRN(SPRG6, 0x56); + LOAD_SPRN(SPRG7, 0x57); + + LOAD_SPRN(IABR, 0x58); + LOAD_SPRN(DABR, 0x59); + LOAD_SPRN(TBWL, 0x5a); /* these two have separate R/W regs */ + LOAD_SPRN(TBWU, 0x5b); + + blr +_ASM_NOKPROBE_SYMBOL(restore_regs) +SYM_FUNC_END(restore_regs) + + + +/* cache flushing code. copied from arch/ppc/boot/util.S */ +#define NUM_CACHE_LINES (128*8) + +/* + * Flush data cache + * Do this by just reading lots of stuff into the cache. + */ +SYM_FUNC_START_LOCAL(flush_data_cache) + lis r3,CONFIG_KERNEL_START@h + ori r3,r3,CONFIG_KERNEL_START@l + li r4,NUM_CACHE_LINES + mtctr r4 +1: + lwz r4,0(r3) + addi r3,r3,L1_CACHE_BYTES /* Next line, please */ + bdnz 1b + blr +SYM_FUNC_END(flush_data_cache) diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c new file mode 100644 index 0000000000..19626cd424 --- /dev/null +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support for 'media5200-platform' compatible boards. + * + * Copyright (C) 2008 Secret Lab Technologies Ltd. + * + * Description: + * This code implements support for the Freescape Media5200 platform + * (built around the MPC5200 SoC). + * + * Notable characteristic of the Media5200 is the presence of an FPGA + * that has all external IRQ lines routed through it. This file implements + * a cascaded interrupt controller driver which attaches itself to the + * Virtual IRQ subsystem after the primary mpc5200 interrupt controller + * is initialized. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct of_device_id mpc5200_gpio_ids[] __initconst = { + { .compatible = "fsl,mpc5200-gpio", }, + { .compatible = "mpc5200-gpio", }, + {} +}; + +/* FPGA register set */ +#define MEDIA5200_IRQ_ENABLE (0x40c) +#define MEDIA5200_IRQ_STATUS (0x410) +#define MEDIA5200_NUM_IRQS (6) +#define MEDIA5200_IRQ_SHIFT (32 - MEDIA5200_NUM_IRQS) + +struct media5200_irq { + void __iomem *regs; + spinlock_t lock; + struct irq_domain *irqhost; +}; +struct media5200_irq media5200_irq; + +static void media5200_irq_unmask(struct irq_data *d) +{ + unsigned long flags; + u32 val; + + spin_lock_irqsave(&media5200_irq.lock, flags); + val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE); + val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)); + out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val); + spin_unlock_irqrestore(&media5200_irq.lock, flags); +} + +static void media5200_irq_mask(struct irq_data *d) +{ + unsigned long flags; + u32 val; + + spin_lock_irqsave(&media5200_irq.lock, flags); + val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE); + val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d))); + out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val); + spin_unlock_irqrestore(&media5200_irq.lock, flags); +} + +static struct irq_chip media5200_irq_chip = { + .name = "Media5200 FPGA", + .irq_unmask = media5200_irq_unmask, + .irq_mask = media5200_irq_mask, + .irq_mask_ack = media5200_irq_mask, +}; + +static void media5200_irq_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + int val; + u32 status, enable; + + /* Mask off the cascaded IRQ */ + raw_spin_lock(&desc->lock); + chip->irq_mask(&desc->irq_data); + raw_spin_unlock(&desc->lock); + + /* Ask the FPGA for IRQ status. If 'val' is 0, then no irqs + * are pending. 'ffs()' is 1 based */ + status = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE); + enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS); + val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT); + if (val) { + generic_handle_domain_irq(media5200_irq.irqhost, val - 1); + /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n", + * __func__, virq, status, enable, val - 1); + */ + } + + /* Processing done; can reenable the cascade now */ + raw_spin_lock(&desc->lock); + chip->irq_ack(&desc->irq_data); + if (!irqd_irq_disabled(&desc->irq_data)) + chip->irq_unmask(&desc->irq_data); + raw_spin_unlock(&desc->lock); +} + +static int media5200_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + pr_debug("%s: h=%p, virq=%i, hwirq=%i\n", __func__, h, virq, (int)hw); + irq_set_chip_data(virq, &media5200_irq); + irq_set_chip_and_handler(virq, &media5200_irq_chip, handle_level_irq); + irq_set_status_flags(virq, IRQ_LEVEL); + return 0; +} + +static int media5200_irq_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, + unsigned int *out_flags) +{ + if (intsize != 2) + return -1; + + pr_debug("%s: bank=%i, number=%i\n", __func__, intspec[0], intspec[1]); + *out_hwirq = intspec[1]; + *out_flags = IRQ_TYPE_NONE; + return 0; +} + +static const struct irq_domain_ops media5200_irq_ops = { + .map = media5200_irq_map, + .xlate = media5200_irq_xlate, +}; + +/* + * Setup Media5200 IRQ mapping + */ +static void __init media5200_init_irq(void) +{ + struct device_node *fpga_np; + int cascade_virq; + + /* First setup the regular MPC5200 interrupt controller */ + mpc52xx_init_irq(); + + /* Now find the FPGA IRQ */ + fpga_np = of_find_compatible_node(NULL, NULL, "fsl,media5200-fpga"); + if (!fpga_np) + goto out; + pr_debug("%s: found fpga node: %pOF\n", __func__, fpga_np); + + media5200_irq.regs = of_iomap(fpga_np, 0); + if (!media5200_irq.regs) + goto out; + pr_debug("%s: mapped to %p\n", __func__, media5200_irq.regs); + + cascade_virq = irq_of_parse_and_map(fpga_np, 0); + if (!cascade_virq) + goto out; + pr_debug("%s: cascaded on virq=%i\n", __func__, cascade_virq); + + /* Disable all FPGA IRQs */ + out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, 0); + + spin_lock_init(&media5200_irq.lock); + + media5200_irq.irqhost = irq_domain_add_linear(fpga_np, + MEDIA5200_NUM_IRQS, &media5200_irq_ops, &media5200_irq); + if (!media5200_irq.irqhost) + goto out; + pr_debug("%s: allocated irqhost\n", __func__); + + of_node_put(fpga_np); + + irq_set_handler_data(cascade_virq, &media5200_irq); + irq_set_chained_handler(cascade_virq, media5200_irq_cascade); + + return; + + out: + pr_err("Could not find Media5200 FPGA; PCI interrupts will not work\n"); + of_node_put(fpga_np); +} + +/* + * Setup the architecture + */ +static void __init media5200_setup_arch(void) +{ + + struct device_node *np; + struct mpc52xx_gpio __iomem *gpio; + u32 port_config; + + if (ppc_md.progress) + ppc_md.progress("media5200_setup_arch()", 0); + + /* Map important registers from the internal memory map */ + mpc52xx_map_common_devices(); + + /* Some mpc5200 & mpc5200b related configuration */ + mpc5200_setup_xlb_arbiter(); + + np = of_find_matching_node(NULL, mpc5200_gpio_ids); + gpio = of_iomap(np, 0); + of_node_put(np); + if (!gpio) { + printk(KERN_ERR "%s() failed. expect abnormal behavior\n", + __func__); + return; + } + + /* Set port config */ + port_config = in_be32(&gpio->port_config); + + port_config &= ~0x03000000; /* ATA CS is on csb_4/5 */ + port_config |= 0x01000000; + + out_be32(&gpio->port_config, port_config); + + /* Unmap zone */ + iounmap(gpio); + +} + +define_machine(media5200_platform) { + .name = "media5200-platform", + .compatible = "fsl,media5200", + .setup_arch = media5200_setup_arch, + .discover_phbs = mpc52xx_setup_pci, + .init = mpc52xx_declare_of_platform_devices, + .init_IRQ = media5200_init_irq, + .get_irq = mpc52xx_get_irq, + .restart = mpc52xx_restart, +}; diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c new file mode 100644 index 0000000000..f1e85e86f5 --- /dev/null +++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support for 'mpc5200-simple-platform' compatible boards. + * + * Written by Marian Balakowicz + * Copyright (C) 2007 Semihalf + * + * Description: + * This code implements support for a simple MPC52xx based boards which + * do not need a custom platform specific setup. Such boards are + * supported assuming the following: + * + * - GPIO pins are configured by the firmware, + * - CDM configuration (clocking) is setup correctly by firmware, + * - if the 'fsl,has-wdt' property is present in one of the + * gpt nodes, then it is safe to use such gpt to reset the board, + * - PCI is supported if enabled in the kernel configuration + * and if there is a PCI bus node defined in the device tree. + * + * Boards that are compatible with this generic platform support + * are listed in a 'board' table. + */ + +#undef DEBUG +#include +#include +#include +#include + +/* + * Setup the architecture + */ +static void __init mpc5200_simple_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mpc5200_simple_setup_arch()", 0); + + /* Map important registers from the internal memory map */ + mpc52xx_map_common_devices(); + + /* Some mpc5200 & mpc5200b related configuration */ + mpc5200_setup_xlb_arbiter(); +} + +/* list of the supported boards */ +static const char *board[] __initdata = { + "anonymous,a3m071", + "anonymous,a4m072", + "anon,charon", + "ifm,o2d", + "intercontrol,digsy-mtc", + "manroland,mucmc52", + "manroland,uc101", + "phytec,pcm030", + "phytec,pcm032", + "promess,motionpro", + "schindler,cm5200", + "tqc,tqm5200", + NULL +}; + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc5200_simple_probe(void) +{ + return of_device_compatible_match(of_root, board); +} + +define_machine(mpc5200_simple_platform) { + .name = "mpc5200-simple-platform", + .probe = mpc5200_simple_probe, + .setup_arch = mpc5200_simple_setup_arch, + .discover_phbs = mpc52xx_setup_pci, + .init = mpc52xx_declare_of_platform_devices, + .init_IRQ = mpc52xx_init_irq, + .get_irq = mpc52xx_get_irq, + .restart = mpc52xx_restart, +}; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c new file mode 100644 index 0000000000..b4938e344f --- /dev/null +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -0,0 +1,306 @@ +/* + * + * Utility functions for the Freescale MPC52xx. + * + * Copyright (C) 2006 Sylvain Munaut + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* MPC5200 device tree match tables */ +static const struct of_device_id mpc52xx_xlb_ids[] __initconst = { + { .compatible = "fsl,mpc5200-xlb", }, + { .compatible = "mpc5200-xlb", }, + {} +}; +static const struct of_device_id mpc52xx_bus_ids[] __initconst = { + { .compatible = "fsl,mpc5200-immr", }, + { .compatible = "fsl,mpc5200b-immr", }, + { .compatible = "simple-bus", }, + + /* depreciated matches; shouldn't be used in new device trees */ + { .compatible = "fsl,lpb", }, + { .type = "builtin", .compatible = "mpc5200", }, /* efika */ + { .type = "soc", .compatible = "mpc5200", }, /* lite5200 */ + {} +}; + +/* + * This variable is mapped in mpc52xx_map_wdt() and used in mpc52xx_restart(). + * Permanent mapping is required because mpc52xx_restart() can be called + * from interrupt context while node mapping (which calls ioremap()) + * cannot be used at such point. + */ +static DEFINE_SPINLOCK(mpc52xx_lock); +static struct mpc52xx_gpt __iomem *mpc52xx_wdt; +static struct mpc52xx_cdm __iomem *mpc52xx_cdm; + +/* + * Configure the XLB arbiter settings to match what Linux expects. + */ +void __init +mpc5200_setup_xlb_arbiter(void) +{ + struct device_node *np; + struct mpc52xx_xlb __iomem *xlb; + + np = of_find_matching_node(NULL, mpc52xx_xlb_ids); + xlb = of_iomap(np, 0); + of_node_put(np); + if (!xlb) { + printk(KERN_ERR __FILE__ ": " + "Error mapping XLB in mpc52xx_setup_cpu(). " + "Expect some abnormal behavior\n"); + return; + } + + /* Configure the XLB Arbiter priorities */ + out_be32(&xlb->master_pri_enable, 0xff); + out_be32(&xlb->master_priority, 0x11111111); + + /* + * Disable XLB pipelining + * (cfr errate 292. We could do this only just before ATA PIO + * transaction and re-enable it afterwards ...) + * Not needed on MPC5200B. + */ + if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR) + out_be32(&xlb->config, in_be32(&xlb->config) | MPC52xx_XLB_CFG_PLDIS); + + iounmap(xlb); +} + +/* + * This variable is mapped in mpc52xx_map_common_devices and + * used in mpc5200_psc_ac97_gpio_reset(). + */ +static DEFINE_SPINLOCK(gpio_lock); +struct mpc52xx_gpio __iomem *simple_gpio; +struct mpc52xx_gpio_wkup __iomem *wkup_gpio; + +/** + * mpc52xx_declare_of_platform_devices: register internal devices and children + * of the localplus bus to the of_platform + * bus. + */ +void __init mpc52xx_declare_of_platform_devices(void) +{ + /* Find all the 'platform' devices and register them. */ + if (of_platform_populate(NULL, mpc52xx_bus_ids, NULL, NULL)) + pr_err(__FILE__ ": Error while populating devices from DT\n"); +} + +/* + * match tables used by mpc52xx_map_common_devices() + */ +static const struct of_device_id mpc52xx_gpt_ids[] __initconst = { + { .compatible = "fsl,mpc5200-gpt", }, + { .compatible = "mpc5200-gpt", }, /* old */ + {} +}; +static const struct of_device_id mpc52xx_cdm_ids[] __initconst = { + { .compatible = "fsl,mpc5200-cdm", }, + { .compatible = "mpc5200-cdm", }, /* old */ + {} +}; +static const struct of_device_id mpc52xx_gpio_simple[] __initconst = { + { .compatible = "fsl,mpc5200-gpio", }, + {} +}; +static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = { + { .compatible = "fsl,mpc5200-gpio-wkup", }, + {} +}; + + +/** + * mpc52xx_map_common_devices: iomap devices required by common code + */ +void __init +mpc52xx_map_common_devices(void) +{ + struct device_node *np; + + /* mpc52xx_wdt is mapped here and used in mpc52xx_restart, + * possibly from a interrupt context. wdt is only implement + * on a gpt0, so check has-wdt property before mapping. + */ + for_each_matching_node(np, mpc52xx_gpt_ids) { + if (of_property_read_bool(np, "fsl,has-wdt") || + of_property_read_bool(np, "has-wdt")) { + mpc52xx_wdt = of_iomap(np, 0); + of_node_put(np); + break; + } + } + + /* Clock Distribution Module, used by PSC clock setting function */ + np = of_find_matching_node(NULL, mpc52xx_cdm_ids); + mpc52xx_cdm = of_iomap(np, 0); + of_node_put(np); + + /* simple_gpio registers */ + np = of_find_matching_node(NULL, mpc52xx_gpio_simple); + simple_gpio = of_iomap(np, 0); + of_node_put(np); + + /* wkup_gpio registers */ + np = of_find_matching_node(NULL, mpc52xx_gpio_wkup); + wkup_gpio = of_iomap(np, 0); + of_node_put(np); +} + +/** + * mpc52xx_set_psc_clkdiv: Set clock divider in the CDM for PSC ports + * + * @psc_id: id of psc port; must be 1,2,3 or 6 + * @clkdiv: clock divider value to put into CDM PSC register. + */ +int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv) +{ + unsigned long flags; + u16 __iomem *reg; + u32 val; + u32 mask; + u32 mclken_div; + + if (!mpc52xx_cdm) + return -ENODEV; + + mclken_div = 0x8000 | (clkdiv & 0x1FF); + switch (psc_id) { + case 1: reg = &mpc52xx_cdm->mclken_div_psc1; mask = 0x20; break; + case 2: reg = &mpc52xx_cdm->mclken_div_psc2; mask = 0x40; break; + case 3: reg = &mpc52xx_cdm->mclken_div_psc3; mask = 0x80; break; + case 6: reg = &mpc52xx_cdm->mclken_div_psc6; mask = 0x10; break; + default: + return -ENODEV; + } + + /* Set the rate and enable the clock */ + spin_lock_irqsave(&mpc52xx_lock, flags); + out_be16(reg, mclken_div); + val = in_be32(&mpc52xx_cdm->clk_enables); + out_be32(&mpc52xx_cdm->clk_enables, val | mask); + spin_unlock_irqrestore(&mpc52xx_lock, flags); + + return 0; +} +EXPORT_SYMBOL(mpc52xx_set_psc_clkdiv); + +/** + * mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer + */ +void __noreturn mpc52xx_restart(char *cmd) +{ + local_irq_disable(); + + /* Turn on the watchdog and wait for it to expire. + * It effectively does a reset. */ + if (mpc52xx_wdt) { + out_be32(&mpc52xx_wdt->mode, 0x00000000); + out_be32(&mpc52xx_wdt->count, 0x000000ff); + out_be32(&mpc52xx_wdt->mode, 0x00009004); + } else + printk(KERN_ERR __FILE__ ": " + "mpc52xx_restart: Can't access wdt. " + "Restart impossible, system halted.\n"); + + while (1); +} + +#define PSC1_RESET 0x1 +#define PSC1_SYNC 0x4 +#define PSC1_SDATA_OUT 0x1 +#define PSC2_RESET 0x2 +#define PSC2_SYNC (0x4<<4) +#define PSC2_SDATA_OUT (0x1<<4) +#define MPC52xx_GPIO_PSC1_MASK 0x7 +#define MPC52xx_GPIO_PSC2_MASK (0x7<<4) + +/** + * mpc5200_psc_ac97_gpio_reset: Use gpio pins to reset the ac97 bus + * + * @psc: psc number to reset (only psc 1 and 2 support ac97) + */ +int mpc5200_psc_ac97_gpio_reset(int psc_number) +{ + unsigned long flags; + u32 gpio; + u32 mux; + int out; + int reset; + int sync; + + if ((!simple_gpio) || (!wkup_gpio)) + return -ENODEV; + + switch (psc_number) { + case 0: + reset = PSC1_RESET; /* AC97_1_RES */ + sync = PSC1_SYNC; /* AC97_1_SYNC */ + out = PSC1_SDATA_OUT; /* AC97_1_SDATA_OUT */ + gpio = MPC52xx_GPIO_PSC1_MASK; + break; + case 1: + reset = PSC2_RESET; /* AC97_2_RES */ + sync = PSC2_SYNC; /* AC97_2_SYNC */ + out = PSC2_SDATA_OUT; /* AC97_2_SDATA_OUT */ + gpio = MPC52xx_GPIO_PSC2_MASK; + break; + default: + pr_err(__FILE__ ": Unable to determine PSC, no ac97 " + "cold-reset will be performed\n"); + return -ENODEV; + } + + spin_lock_irqsave(&gpio_lock, flags); + + /* Reconfigure pin-muxing to gpio */ + mux = in_be32(&simple_gpio->port_config); + out_be32(&simple_gpio->port_config, mux & (~gpio)); + + /* enable gpio pins for output */ + setbits8(&wkup_gpio->wkup_gpioe, reset); + setbits32(&simple_gpio->simple_gpioe, sync | out); + + setbits8(&wkup_gpio->wkup_ddr, reset); + setbits32(&simple_gpio->simple_ddr, sync | out); + + /* Assert cold reset */ + clrbits32(&simple_gpio->simple_dvo, sync | out); + clrbits8(&wkup_gpio->wkup_dvo, reset); + + /* wait for 1 us */ + udelay(1); + + /* Deassert reset */ + setbits8(&wkup_gpio->wkup_dvo, reset); + + /* wait at least 200ns */ + /* 7 ~= (200ns * timebase) / ns2sec */ + __delay(7); + + /* Restore pin-muxing */ + out_be32(&simple_gpio->port_config, mux); + + spin_unlock_irqrestore(&gpio_lock, flags); + + return 0; +} +EXPORT_SYMBOL(mpc5200_psc_ac97_gpio_reset); diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c new file mode 100644 index 0000000000..581059527c --- /dev/null +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -0,0 +1,780 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MPC5200 General Purpose Timer device driver + * + * Copyright (c) 2009 Secret Lab Technologies Ltd. + * Copyright (c) 2008 Sascha Hauer , Pengutronix + * + * This file is a driver for the General Purpose Timer (gpt) devices + * found on the MPC5200 SoC. Each timer has an IO pin which can be used + * for GPIO or can be used to raise interrupts. The timer function can + * be used independently from the IO pin, or it can be used to control + * output signals or measure input signals. + * + * This driver supports the GPIO and IRQ controller functions of the GPT + * device. Timer functions are not yet supported. + * + * The timer gpt0 can be used as watchdog (wdt). If the wdt mode is used, + * this prevents the use of any gpt0 gpt function (i.e. they will fail with + * -EBUSY). Thus, the safety wdt function always has precedence over the gpt + * function. If the kernel has been compiled with CONFIG_WATCHDOG_NOWAYOUT, + * this means that gpt0 is locked in wdt mode until the next reboot - this + * may be a requirement in safety applications. + * + * To use the GPIO function, the following two properties must be added + * to the device tree node for the gpt device (typically in the .dts file + * for the board): + * gpio-controller; + * #gpio-cells = < 2 >; + * This driver will register the GPIO pin if it finds the gpio-controller + * property in the device tree. + * + * To use the IRQ controller function, the following two properties must + * be added to the device tree node for the gpt device: + * interrupt-controller; + * #interrupt-cells = < 1 >; + * The IRQ controller binding only uses one cell to specify the interrupt, + * and the IRQ flags are encoded in the cell. A cell is not used to encode + * the IRQ number because the GPT only has a single IRQ source. For flags, + * a value of '1' means rising edge sensitive and '2' means falling edge. + * + * The GPIO and the IRQ controller functions can be used at the same time, + * but in this use case the IO line will only work as an input. Trying to + * use it as a GPIO output will not work. + * + * When using the GPIO line as an output, it can either be driven as normal + * IO, or it can be an Open Collector (OC) output. At the moment it is the + * responsibility of either the bootloader or the platform setup code to set + * the output mode. This driver does not change the output mode setting. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Freescale MPC52xx gpt driver"); +MODULE_AUTHOR("Sascha Hauer, Grant Likely, Albrecht Dreß"); +MODULE_LICENSE("GPL"); + +/** + * struct mpc52xx_gpt - Private data structure for MPC52xx GPT driver + * @dev: pointer to device structure + * @regs: virtual address of GPT registers + * @lock: spinlock to coordinate between different functions. + * @gc: gpio_chip instance structure; used when GPIO is enabled + * @irqhost: Pointer to irq_domain instance; used when IRQ mode is supported + * @wdt_mode: only relevant for gpt0: bit 0 (MPC52xx_GPT_CAN_WDT) indicates + * if the gpt may be used as wdt, bit 1 (MPC52xx_GPT_IS_WDT) indicates + * if the timer is actively used as wdt which blocks gpt functions + */ +struct mpc52xx_gpt_priv { + struct list_head list; /* List of all GPT devices */ + struct device *dev; + struct mpc52xx_gpt __iomem *regs; + raw_spinlock_t lock; + struct irq_domain *irqhost; + u32 ipb_freq; + u8 wdt_mode; + +#if defined(CONFIG_GPIOLIB) + struct gpio_chip gc; +#endif +}; + +LIST_HEAD(mpc52xx_gpt_list); +DEFINE_MUTEX(mpc52xx_gpt_list_mutex); + +#define MPC52xx_GPT_MODE_MS_MASK (0x07) +#define MPC52xx_GPT_MODE_MS_IC (0x01) +#define MPC52xx_GPT_MODE_MS_OC (0x02) +#define MPC52xx_GPT_MODE_MS_PWM (0x03) +#define MPC52xx_GPT_MODE_MS_GPIO (0x04) + +#define MPC52xx_GPT_MODE_GPIO_MASK (0x30) +#define MPC52xx_GPT_MODE_GPIO_OUT_LOW (0x20) +#define MPC52xx_GPT_MODE_GPIO_OUT_HIGH (0x30) + +#define MPC52xx_GPT_MODE_COUNTER_ENABLE (0x1000) +#define MPC52xx_GPT_MODE_CONTINUOUS (0x0400) +#define MPC52xx_GPT_MODE_OPEN_DRAIN (0x0200) +#define MPC52xx_GPT_MODE_IRQ_EN (0x0100) +#define MPC52xx_GPT_MODE_WDT_EN (0x8000) + +#define MPC52xx_GPT_MODE_ICT_MASK (0x030000) +#define MPC52xx_GPT_MODE_ICT_RISING (0x010000) +#define MPC52xx_GPT_MODE_ICT_FALLING (0x020000) +#define MPC52xx_GPT_MODE_ICT_TOGGLE (0x030000) + +#define MPC52xx_GPT_MODE_WDT_PING (0xa5) + +#define MPC52xx_GPT_STATUS_IRQMASK (0x000f) + +#define MPC52xx_GPT_CAN_WDT (1 << 0) +#define MPC52xx_GPT_IS_WDT (1 << 1) + + +/* --------------------------------------------------------------------- + * Cascaded interrupt controller hooks + */ + +static void mpc52xx_gpt_irq_unmask(struct irq_data *d) +{ + struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); + unsigned long flags; + + raw_spin_lock_irqsave(&gpt->lock, flags); + setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN); + raw_spin_unlock_irqrestore(&gpt->lock, flags); +} + +static void mpc52xx_gpt_irq_mask(struct irq_data *d) +{ + struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); + unsigned long flags; + + raw_spin_lock_irqsave(&gpt->lock, flags); + clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN); + raw_spin_unlock_irqrestore(&gpt->lock, flags); +} + +static void mpc52xx_gpt_irq_ack(struct irq_data *d) +{ + struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); + + out_be32(&gpt->regs->status, MPC52xx_GPT_STATUS_IRQMASK); +} + +static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type) +{ + struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d); + unsigned long flags; + u32 reg; + + dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type); + + raw_spin_lock_irqsave(&gpt->lock, flags); + reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK; + if (flow_type & IRQF_TRIGGER_RISING) + reg |= MPC52xx_GPT_MODE_ICT_RISING; + if (flow_type & IRQF_TRIGGER_FALLING) + reg |= MPC52xx_GPT_MODE_ICT_FALLING; + out_be32(&gpt->regs->mode, reg); + raw_spin_unlock_irqrestore(&gpt->lock, flags); + + return 0; +} + +static struct irq_chip mpc52xx_gpt_irq_chip = { + .name = "MPC52xx GPT", + .irq_unmask = mpc52xx_gpt_irq_unmask, + .irq_mask = mpc52xx_gpt_irq_mask, + .irq_ack = mpc52xx_gpt_irq_ack, + .irq_set_type = mpc52xx_gpt_irq_set_type, +}; + +static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc) +{ + struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); + u32 status; + + status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK; + if (status) + generic_handle_domain_irq(gpt->irqhost, 0); +} + +static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + struct mpc52xx_gpt_priv *gpt = h->host_data; + + dev_dbg(gpt->dev, "%s: h=%p, virq=%i\n", __func__, h, virq); + irq_set_chip_data(virq, gpt); + irq_set_chip_and_handler(virq, &mpc52xx_gpt_irq_chip, handle_edge_irq); + + return 0; +} + +static int mpc52xx_gpt_irq_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, + unsigned int *out_flags) +{ + struct mpc52xx_gpt_priv *gpt = h->host_data; + + dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]); + + if ((intsize < 1) || (intspec[0] > 3)) { + dev_err(gpt->dev, "bad irq specifier in %pOF\n", ct); + return -EINVAL; + } + + *out_hwirq = 0; /* The GPT only has 1 IRQ line */ + *out_flags = intspec[0]; + + return 0; +} + +static const struct irq_domain_ops mpc52xx_gpt_irq_ops = { + .map = mpc52xx_gpt_irq_map, + .xlate = mpc52xx_gpt_irq_xlate, +}; + +static void +mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node) +{ + int cascade_virq; + unsigned long flags; + u32 mode; + + cascade_virq = irq_of_parse_and_map(node, 0); + if (!cascade_virq) + return; + + gpt->irqhost = irq_domain_add_linear(node, 1, &mpc52xx_gpt_irq_ops, gpt); + if (!gpt->irqhost) { + dev_err(gpt->dev, "irq_domain_add_linear() failed\n"); + return; + } + + irq_set_handler_data(cascade_virq, gpt); + irq_set_chained_handler(cascade_virq, mpc52xx_gpt_irq_cascade); + + /* If the GPT is currently disabled, then change it to be in Input + * Capture mode. If the mode is non-zero, then the pin could be + * already in use for something. */ + raw_spin_lock_irqsave(&gpt->lock, flags); + mode = in_be32(&gpt->regs->mode); + if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0) + out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC); + raw_spin_unlock_irqrestore(&gpt->lock, flags); + + dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq); +} + + +/* --------------------------------------------------------------------- + * GPIOLIB hooks + */ +#if defined(CONFIG_GPIOLIB) +static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc); + + return (in_be32(&gpt->regs->status) >> 8) & 1; +} + +static void +mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v) +{ + struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc); + unsigned long flags; + u32 r; + + dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v); + r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW; + + raw_spin_lock_irqsave(&gpt->lock, flags); + clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r); + raw_spin_unlock_irqrestore(&gpt->lock, flags); +} + +static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc); + unsigned long flags; + + dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio); + + raw_spin_lock_irqsave(&gpt->lock, flags); + clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK); + raw_spin_unlock_irqrestore(&gpt->lock, flags); + + return 0; +} + +static int +mpc52xx_gpt_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + mpc52xx_gpt_gpio_set(gc, gpio, val); + return 0; +} + +static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) +{ + int rc; + + /* Only setup GPIO if the device claims the GPT is a GPIO controller */ + if (!device_property_present(gpt->dev, "gpio-controller")) + return; + + gpt->gc.label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(gpt->dev)); + if (!gpt->gc.label) { + dev_err(gpt->dev, "out of memory\n"); + return; + } + + gpt->gc.ngpio = 1; + gpt->gc.direction_input = mpc52xx_gpt_gpio_dir_in; + gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out; + gpt->gc.get = mpc52xx_gpt_gpio_get; + gpt->gc.set = mpc52xx_gpt_gpio_set; + gpt->gc.base = -1; + gpt->gc.parent = gpt->dev; + + /* Setup external pin in GPIO mode */ + clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK, + MPC52xx_GPT_MODE_MS_GPIO); + + rc = gpiochip_add_data(&gpt->gc, gpt); + if (rc) + dev_err(gpt->dev, "gpiochip_add_data() failed; rc=%i\n", rc); + + dev_dbg(gpt->dev, "%s() complete.\n", __func__); +} +#else /* defined(CONFIG_GPIOLIB) */ +static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { } +#endif /* defined(CONFIG_GPIOLIB) */ + +/*********************************************************************** + * Timer API + */ + +/** + * mpc52xx_gpt_from_irq - Return the GPT device associated with an IRQ number + * @irq: irq of timer. + */ +struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq) +{ + struct mpc52xx_gpt_priv *gpt; + struct list_head *pos; + + /* Iterate over the list of timers looking for a matching device */ + mutex_lock(&mpc52xx_gpt_list_mutex); + list_for_each(pos, &mpc52xx_gpt_list) { + gpt = container_of(pos, struct mpc52xx_gpt_priv, list); + if (gpt->irqhost && irq == irq_linear_revmap(gpt->irqhost, 0)) { + mutex_unlock(&mpc52xx_gpt_list_mutex); + return gpt; + } + } + mutex_unlock(&mpc52xx_gpt_list_mutex); + + return NULL; +} +EXPORT_SYMBOL(mpc52xx_gpt_from_irq); + +static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period, + int continuous, int as_wdt) +{ + u32 clear, set; + u64 clocks; + u32 prescale; + unsigned long flags; + + clear = MPC52xx_GPT_MODE_MS_MASK | MPC52xx_GPT_MODE_CONTINUOUS; + set = MPC52xx_GPT_MODE_MS_GPIO | MPC52xx_GPT_MODE_COUNTER_ENABLE; + if (as_wdt) { + clear |= MPC52xx_GPT_MODE_IRQ_EN; + set |= MPC52xx_GPT_MODE_WDT_EN; + } else if (continuous) + set |= MPC52xx_GPT_MODE_CONTINUOUS; + + /* Determine the number of clocks in the requested period. 64 bit + * arithmetic is done here to preserve the precision until the value + * is scaled back down into the u32 range. Period is in 'ns', bus + * frequency is in Hz. */ + clocks = period * (u64)gpt->ipb_freq; + do_div(clocks, 1000000000); /* Scale it down to ns range */ + + /* This device cannot handle a clock count greater than 32 bits */ + if (clocks > 0xffffffff) + return -EINVAL; + + /* Calculate the prescaler and count values from the clocks value. + * 'clocks' is the number of clock ticks in the period. The timer + * has 16 bit precision and a 16 bit prescaler. Prescaler is + * calculated by integer dividing the clocks by 0x10000 (shifting + * down 16 bits) to obtain the smallest possible divisor for clocks + * to get a 16 bit count value. + * + * Note: the prescale register is '1' based, not '0' based. ie. a + * value of '1' means divide the clock by one. 0xffff divides the + * clock by 0xffff. '0x0000' does not divide by zero, but wraps + * around and divides by 0x10000. That is why prescale must be + * a u32 variable, not a u16, for this calculation. */ + prescale = (clocks >> 16) + 1; + do_div(clocks, prescale); + if (clocks > 0xffff) { + pr_err("calculation error; prescale:%x clocks:%llx\n", + prescale, clocks); + return -EINVAL; + } + + /* Set and enable the timer, reject an attempt to use a wdt as gpt */ + raw_spin_lock_irqsave(&gpt->lock, flags); + if (as_wdt) + gpt->wdt_mode |= MPC52xx_GPT_IS_WDT; + else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) { + raw_spin_unlock_irqrestore(&gpt->lock, flags); + return -EBUSY; + } + out_be32(&gpt->regs->count, prescale << 16 | clocks); + clrsetbits_be32(&gpt->regs->mode, clear, set); + raw_spin_unlock_irqrestore(&gpt->lock, flags); + + return 0; +} + +/** + * mpc52xx_gpt_start_timer - Set and enable the GPT timer + * @gpt: Pointer to gpt private data structure + * @period: period of timer in ns; max. ~130s @ 33MHz IPB clock + * @continuous: set to 1 to make timer continuous free running + * + * An interrupt will be generated every time the timer fires + */ +int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period, + int continuous) +{ + return mpc52xx_gpt_do_start(gpt, period, continuous, 0); +} +EXPORT_SYMBOL(mpc52xx_gpt_start_timer); + +/** + * mpc52xx_gpt_stop_timer - Stop a gpt + * @gpt: Pointer to gpt private data structure + * + * Returns an error if attempting to stop a wdt + */ +int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt) +{ + unsigned long flags; + + /* reject the operation if the timer is used as watchdog (gpt 0 only) */ + raw_spin_lock_irqsave(&gpt->lock, flags); + if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) { + raw_spin_unlock_irqrestore(&gpt->lock, flags); + return -EBUSY; + } + + clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE); + raw_spin_unlock_irqrestore(&gpt->lock, flags); + return 0; +} +EXPORT_SYMBOL(mpc52xx_gpt_stop_timer); + +/** + * mpc52xx_gpt_timer_period - Read the timer period + * @gpt: Pointer to gpt private data structure + * + * Returns the timer period in ns + */ +u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt) +{ + u64 period; + u64 prescale; + unsigned long flags; + + raw_spin_lock_irqsave(&gpt->lock, flags); + period = in_be32(&gpt->regs->count); + raw_spin_unlock_irqrestore(&gpt->lock, flags); + + prescale = period >> 16; + period &= 0xffff; + if (prescale == 0) + prescale = 0x10000; + period = period * prescale * 1000000000ULL; + do_div(period, gpt->ipb_freq); + return period; +} +EXPORT_SYMBOL(mpc52xx_gpt_timer_period); + +#if defined(CONFIG_MPC5200_WDT) +/*********************************************************************** + * Watchdog API for gpt0 + */ + +#define WDT_IDENTITY "mpc52xx watchdog on GPT0" + +/* wdt_is_active stores whether or not the /dev/watchdog device is opened */ +static unsigned long wdt_is_active; + +/* wdt-capable gpt */ +static struct mpc52xx_gpt_priv *mpc52xx_gpt_wdt; + +/* low-level wdt functions */ +static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&gpt_wdt->lock, flags); + out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING); + raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags); +} + +/* wdt misc device api */ +static ssize_t mpc52xx_wdt_write(struct file *file, const char __user *data, + size_t len, loff_t *ppos) +{ + struct mpc52xx_gpt_priv *gpt_wdt = file->private_data; + mpc52xx_gpt_wdt_ping(gpt_wdt); + return 0; +} + +static const struct watchdog_info mpc5200_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = WDT_IDENTITY, +}; + +static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct mpc52xx_gpt_priv *gpt_wdt = file->private_data; + int __user *data = (int __user *)arg; + int timeout; + u64 real_timeout; + int ret = 0; + + switch (cmd) { + case WDIOC_GETSUPPORT: + ret = copy_to_user(data, &mpc5200_wdt_info, + sizeof(mpc5200_wdt_info)); + if (ret) + ret = -EFAULT; + break; + + case WDIOC_GETSTATUS: + case WDIOC_GETBOOTSTATUS: + ret = put_user(0, data); + break; + + case WDIOC_KEEPALIVE: + mpc52xx_gpt_wdt_ping(gpt_wdt); + break; + + case WDIOC_SETTIMEOUT: + ret = get_user(timeout, data); + if (ret) + break; + real_timeout = (u64) timeout * 1000000000ULL; + ret = mpc52xx_gpt_do_start(gpt_wdt, real_timeout, 0, 1); + if (ret) + break; + /* fall through and return the timeout */ + fallthrough; + + case WDIOC_GETTIMEOUT: + /* we need to round here as to avoid e.g. the following + * situation: + * - timeout requested is 1 second; + * - real timeout @33MHz is 999997090ns + * - the int divide by 10^9 will return 0. + */ + real_timeout = + mpc52xx_gpt_timer_period(gpt_wdt) + 500000000ULL; + do_div(real_timeout, 1000000000ULL); + timeout = (int) real_timeout; + ret = put_user(timeout, data); + break; + + default: + ret = -ENOTTY; + } + return ret; +} + +static int mpc52xx_wdt_open(struct inode *inode, struct file *file) +{ + int ret; + + /* sanity check */ + if (!mpc52xx_gpt_wdt) + return -ENODEV; + + /* /dev/watchdog can only be opened once */ + if (test_and_set_bit(0, &wdt_is_active)) + return -EBUSY; + + /* Set and activate the watchdog with 30 seconds timeout */ + ret = mpc52xx_gpt_do_start(mpc52xx_gpt_wdt, 30ULL * 1000000000ULL, + 0, 1); + if (ret) { + clear_bit(0, &wdt_is_active); + return ret; + } + + file->private_data = mpc52xx_gpt_wdt; + return stream_open(inode, file); +} + +static int mpc52xx_wdt_release(struct inode *inode, struct file *file) +{ + /* note: releasing the wdt in NOWAYOUT-mode does not stop it */ +#if !defined(CONFIG_WATCHDOG_NOWAYOUT) + struct mpc52xx_gpt_priv *gpt_wdt = file->private_data; + unsigned long flags; + + raw_spin_lock_irqsave(&gpt_wdt->lock, flags); + clrbits32(&gpt_wdt->regs->mode, + MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN); + gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT; + raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags); +#endif + clear_bit(0, &wdt_is_active); + return 0; +} + + +static const struct file_operations mpc52xx_wdt_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .write = mpc52xx_wdt_write, + .unlocked_ioctl = mpc52xx_wdt_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .open = mpc52xx_wdt_open, + .release = mpc52xx_wdt_release, +}; + +static struct miscdevice mpc52xx_wdt_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &mpc52xx_wdt_fops, +}; + +static int mpc52xx_gpt_wdt_init(void) +{ + int err; + + /* try to register the watchdog misc device */ + err = misc_register(&mpc52xx_wdt_miscdev); + if (err) + pr_err("%s: cannot register watchdog device\n", WDT_IDENTITY); + else + pr_info("%s: watchdog device registered\n", WDT_IDENTITY); + return err; +} + +static int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt, + const u32 *period) +{ + u64 real_timeout; + + /* remember the gpt for the wdt operation */ + mpc52xx_gpt_wdt = gpt; + + /* configure the wdt if the device tree contained a timeout */ + if (!period || *period == 0) + return 0; + + real_timeout = (u64) *period * 1000000000ULL; + if (mpc52xx_gpt_do_start(gpt, real_timeout, 0, 1)) + dev_warn(gpt->dev, "starting as wdt failed\n"); + else + dev_info(gpt->dev, "watchdog set to %us timeout\n", *period); + return 0; +} + +#else + +static int mpc52xx_gpt_wdt_init(void) +{ + return 0; +} + +static inline int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt, + const u32 *period) +{ + return 0; +} + +#endif /* CONFIG_MPC5200_WDT */ + +/* --------------------------------------------------------------------- + * of_platform bus binding code + */ +static int mpc52xx_gpt_probe(struct platform_device *ofdev) +{ + struct mpc52xx_gpt_priv *gpt; + + gpt = devm_kzalloc(&ofdev->dev, sizeof *gpt, GFP_KERNEL); + if (!gpt) + return -ENOMEM; + + raw_spin_lock_init(&gpt->lock); + gpt->dev = &ofdev->dev; + gpt->ipb_freq = mpc5xxx_get_bus_frequency(&ofdev->dev); + gpt->regs = of_iomap(ofdev->dev.of_node, 0); + if (!gpt->regs) + return -ENOMEM; + + dev_set_drvdata(&ofdev->dev, gpt); + + mpc52xx_gpt_gpio_setup(gpt); + mpc52xx_gpt_irq_setup(gpt, ofdev->dev.of_node); + + mutex_lock(&mpc52xx_gpt_list_mutex); + list_add(&gpt->list, &mpc52xx_gpt_list); + mutex_unlock(&mpc52xx_gpt_list_mutex); + + /* check if this device could be a watchdog */ + if (of_property_read_bool(ofdev->dev.of_node, "fsl,has-wdt") || + of_property_read_bool(ofdev->dev.of_node, "has-wdt")) { + const u32 *on_boot_wdt; + + gpt->wdt_mode = MPC52xx_GPT_CAN_WDT; + on_boot_wdt = of_get_property(ofdev->dev.of_node, + "fsl,wdt-on-boot", NULL); + if (on_boot_wdt) { + dev_info(gpt->dev, "used as watchdog\n"); + gpt->wdt_mode |= MPC52xx_GPT_IS_WDT; + } else + dev_info(gpt->dev, "can function as watchdog\n"); + mpc52xx_gpt_wdt_setup(gpt, on_boot_wdt); + } + + return 0; +} + +static const struct of_device_id mpc52xx_gpt_match[] = { + { .compatible = "fsl,mpc5200-gpt", }, + + /* Depreciated compatible values; don't use for new dts files */ + { .compatible = "fsl,mpc5200-gpt-gpio", }, + { .compatible = "mpc5200-gpt", }, + {} +}; + +static struct platform_driver mpc52xx_gpt_driver = { + .driver = { + .name = "mpc52xx-gpt", + .suppress_bind_attrs = true, + .of_match_table = mpc52xx_gpt_match, + }, + .probe = mpc52xx_gpt_probe, +}; + +static int __init mpc52xx_gpt_init(void) +{ + return platform_driver_register(&mpc52xx_gpt_driver); +} + +/* Make sure GPIOs and IRQs get set up before anyone tries to use them */ +subsys_initcall(mpc52xx_gpt_init); +device_initcall(mpc52xx_gpt_wdt_init); diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c new file mode 100644 index 0000000000..0ca4401ba7 --- /dev/null +++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c @@ -0,0 +1,419 @@ +/* + * PCI code for the Freescale MPC52xx embedded CPU. + * + * Copyright (C) 2006 Secret Lab Technologies Ltd. + * Grant Likely + * Copyright (C) 2004 Sylvain Munaut + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include + + +/* ======================================================================== */ +/* Structures mapping & Defines for PCI Unit */ +/* ======================================================================== */ + +#define MPC52xx_PCI_GSCR_BM 0x40000000 +#define MPC52xx_PCI_GSCR_PE 0x20000000 +#define MPC52xx_PCI_GSCR_SE 0x10000000 +#define MPC52xx_PCI_GSCR_XLB2PCI_MASK 0x07000000 +#define MPC52xx_PCI_GSCR_XLB2PCI_SHIFT 24 +#define MPC52xx_PCI_GSCR_IPG2PCI_MASK 0x00070000 +#define MPC52xx_PCI_GSCR_IPG2PCI_SHIFT 16 +#define MPC52xx_PCI_GSCR_BME 0x00004000 +#define MPC52xx_PCI_GSCR_PEE 0x00002000 +#define MPC52xx_PCI_GSCR_SEE 0x00001000 +#define MPC52xx_PCI_GSCR_PR 0x00000001 + + +#define MPC52xx_PCI_IWBTAR_TRANSLATION(proc_ad,pci_ad,size) \ + ( ( (proc_ad) & 0xff000000 ) | \ + ( (((size) - 1) >> 8) & 0x00ff0000 ) | \ + ( ((pci_ad) >> 16) & 0x0000ff00 ) ) + +#define MPC52xx_PCI_IWCR_PACK(win0,win1,win2) (((win0) << 24) | \ + ((win1) << 16) | \ + ((win2) << 8)) + +#define MPC52xx_PCI_IWCR_DISABLE 0x0 +#define MPC52xx_PCI_IWCR_ENABLE 0x1 +#define MPC52xx_PCI_IWCR_READ 0x0 +#define MPC52xx_PCI_IWCR_READ_LINE 0x2 +#define MPC52xx_PCI_IWCR_READ_MULTI 0x4 +#define MPC52xx_PCI_IWCR_MEM 0x0 +#define MPC52xx_PCI_IWCR_IO 0x8 + +#define MPC52xx_PCI_TCR_P 0x01000000 +#define MPC52xx_PCI_TCR_LD 0x00010000 +#define MPC52xx_PCI_TCR_WCT8 0x00000008 + +#define MPC52xx_PCI_TBATR_DISABLE 0x0 +#define MPC52xx_PCI_TBATR_ENABLE 0x1 + +struct mpc52xx_pci { + u32 idr; /* PCI + 0x00 */ + u32 scr; /* PCI + 0x04 */ + u32 ccrir; /* PCI + 0x08 */ + u32 cr1; /* PCI + 0x0C */ + u32 bar0; /* PCI + 0x10 */ + u32 bar1; /* PCI + 0x14 */ + u8 reserved1[16]; /* PCI + 0x18 */ + u32 ccpr; /* PCI + 0x28 */ + u32 sid; /* PCI + 0x2C */ + u32 erbar; /* PCI + 0x30 */ + u32 cpr; /* PCI + 0x34 */ + u8 reserved2[4]; /* PCI + 0x38 */ + u32 cr2; /* PCI + 0x3C */ + u8 reserved3[32]; /* PCI + 0x40 */ + u32 gscr; /* PCI + 0x60 */ + u32 tbatr0; /* PCI + 0x64 */ + u32 tbatr1; /* PCI + 0x68 */ + u32 tcr; /* PCI + 0x6C */ + u32 iw0btar; /* PCI + 0x70 */ + u32 iw1btar; /* PCI + 0x74 */ + u32 iw2btar; /* PCI + 0x78 */ + u8 reserved4[4]; /* PCI + 0x7C */ + u32 iwcr; /* PCI + 0x80 */ + u32 icr; /* PCI + 0x84 */ + u32 isr; /* PCI + 0x88 */ + u32 arb; /* PCI + 0x8C */ + u8 reserved5[104]; /* PCI + 0x90 */ + u32 car; /* PCI + 0xF8 */ + u8 reserved6[4]; /* PCI + 0xFC */ +}; + +/* MPC5200 device tree match tables */ +const struct of_device_id mpc52xx_pci_ids[] __initconst = { + { .type = "pci", .compatible = "fsl,mpc5200-pci", }, + { .type = "pci", .compatible = "mpc5200-pci", }, + {} +}; + +/* ======================================================================== */ +/* PCI configuration access */ +/* ======================================================================== */ + +static int +mpc52xx_pci_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + u32 value; + + if (ppc_md.pci_exclude_device) + if (ppc_md.pci_exclude_device(hose, bus->number, devfn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + out_be32(hose->cfg_addr, + (1 << 31) | + (bus->number << 16) | + (devfn << 8) | + (offset & 0xfc)); + mb(); + +#if defined(CONFIG_PPC_MPC5200_BUGFIX) + if (bus->number) { + /* workaround for the bug 435 of the MPC5200 (L25R); + * Don't do 32 bits config access during type-1 cycles */ + switch (len) { + case 1: + value = in_8(((u8 __iomem *)hose->cfg_data) + + (offset & 3)); + break; + case 2: + value = in_le16(((u16 __iomem *)hose->cfg_data) + + ((offset>>1) & 1)); + break; + + default: + value = in_le16((u16 __iomem *)hose->cfg_data) | + (in_le16(((u16 __iomem *)hose->cfg_data) + 1) << 16); + break; + } + } + else +#endif + { + value = in_le32(hose->cfg_data); + + if (len != 4) { + value >>= ((offset & 0x3) << 3); + value &= 0xffffffff >> (32 - (len << 3)); + } + } + + *val = value; + + out_be32(hose->cfg_addr, 0); + mb(); + + return PCIBIOS_SUCCESSFUL; +} + +static int +mpc52xx_pci_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + u32 value, mask; + + if (ppc_md.pci_exclude_device) + if (ppc_md.pci_exclude_device(hose, bus->number, devfn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + out_be32(hose->cfg_addr, + (1 << 31) | + (bus->number << 16) | + (devfn << 8) | + (offset & 0xfc)); + mb(); + +#if defined(CONFIG_PPC_MPC5200_BUGFIX) + if (bus->number) { + /* workaround for the bug 435 of the MPC5200 (L25R); + * Don't do 32 bits config access during type-1 cycles */ + switch (len) { + case 1: + out_8(((u8 __iomem *)hose->cfg_data) + + (offset & 3), val); + break; + case 2: + out_le16(((u16 __iomem *)hose->cfg_data) + + ((offset>>1) & 1), val); + break; + + default: + out_le16((u16 __iomem *)hose->cfg_data, + (u16)val); + out_le16(((u16 __iomem *)hose->cfg_data) + 1, + (u16)(val>>16)); + break; + } + } + else +#endif + { + if (len != 4) { + value = in_le32(hose->cfg_data); + + offset = (offset & 0x3) << 3; + mask = (0xffffffff >> (32 - (len << 3))); + mask <<= offset; + + value &= ~mask; + val = value | ((val << offset) & mask); + } + + out_le32(hose->cfg_data, val); + } + mb(); + + out_be32(hose->cfg_addr, 0); + mb(); + + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops mpc52xx_pci_ops = { + .read = mpc52xx_pci_read_config, + .write = mpc52xx_pci_write_config +}; + + +/* ======================================================================== */ +/* PCI setup */ +/* ======================================================================== */ + +static void __init +mpc52xx_pci_setup(struct pci_controller *hose, + struct mpc52xx_pci __iomem *pci_regs, phys_addr_t pci_phys) +{ + struct resource *res; + u32 tmp; + int iwcr0 = 0, iwcr1 = 0, iwcr2 = 0; + + pr_debug("%s(hose=%p, pci_regs=%p)\n", __func__, hose, pci_regs); + + /* pci_process_bridge_OF_ranges() found all our addresses for us; + * now store them in the right places */ + hose->cfg_addr = &pci_regs->car; + hose->cfg_data = hose->io_base_virt; + + /* Control regs */ + tmp = in_be32(&pci_regs->scr); + tmp |= PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY; + out_be32(&pci_regs->scr, tmp); + + /* Memory windows */ + res = &hose->mem_resources[0]; + if (res->flags) { + pr_debug("mem_resource[0] = %pr\n", res); + out_be32(&pci_regs->iw0btar, + MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start, + resource_size(res))); + iwcr0 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM; + if (res->flags & IORESOURCE_PREFETCH) + iwcr0 |= MPC52xx_PCI_IWCR_READ_MULTI; + else + iwcr0 |= MPC52xx_PCI_IWCR_READ; + } + + res = &hose->mem_resources[1]; + if (res->flags) { + pr_debug("mem_resource[1] = %pr\n", res); + out_be32(&pci_regs->iw1btar, + MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start, + resource_size(res))); + iwcr1 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM; + if (res->flags & IORESOURCE_PREFETCH) + iwcr1 |= MPC52xx_PCI_IWCR_READ_MULTI; + else + iwcr1 |= MPC52xx_PCI_IWCR_READ; + } + + /* IO resources */ + res = &hose->io_resource; + if (!res) { + printk(KERN_ERR "%s: Didn't find IO resources\n", __FILE__); + return; + } + pr_debug(".io_resource = %pr .io_base_phys=0x%pa\n", + res, &hose->io_base_phys); + out_be32(&pci_regs->iw2btar, + MPC52xx_PCI_IWBTAR_TRANSLATION(hose->io_base_phys, + res->start, + resource_size(res))); + iwcr2 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_IO; + + /* Set all the IWCR fields at once; they're in the same reg */ + out_be32(&pci_regs->iwcr, MPC52xx_PCI_IWCR_PACK(iwcr0, iwcr1, iwcr2)); + + /* Map IMMR onto PCI bus */ + pci_phys &= 0xfffc0000; /* bar0 has only 14 significant bits */ + out_be32(&pci_regs->tbatr0, MPC52xx_PCI_TBATR_ENABLE | pci_phys); + out_be32(&pci_regs->bar0, PCI_BASE_ADDRESS_MEM_PREFETCH | pci_phys); + + /* Map memory onto PCI bus */ + out_be32(&pci_regs->tbatr1, MPC52xx_PCI_TBATR_ENABLE); + out_be32(&pci_regs->bar1, PCI_BASE_ADDRESS_MEM_PREFETCH); + + out_be32(&pci_regs->tcr, MPC52xx_PCI_TCR_LD | MPC52xx_PCI_TCR_WCT8); + + tmp = in_be32(&pci_regs->gscr); +#if 0 + /* Reset the exteral bus ( internal PCI controller is NOT reset ) */ + /* Not necessary and can be a bad thing if for example the bootloader + is displaying a splash screen or ... Just left here for + documentation purpose if anyone need it */ + out_be32(&pci_regs->gscr, tmp | MPC52xx_PCI_GSCR_PR); + udelay(50); +#endif + + /* Make sure the PCI bridge is out of reset */ + out_be32(&pci_regs->gscr, tmp & ~MPC52xx_PCI_GSCR_PR); +} + +static void +mpc52xx_pci_fixup_resources(struct pci_dev *dev) +{ + struct resource *res; + + pr_debug("%s() %.4x:%.4x\n", __func__, dev->vendor, dev->device); + + /* We don't rely on boot loader for PCI and resets all + devices */ + pci_dev_for_each_resource(dev, res) { + if (res->end > res->start) { /* Only valid resources */ + res->end -= res->start; + res->start = 0; + res->flags |= IORESOURCE_UNSET; + } + } + + /* The PCI Host bridge of MPC52xx has a prefetch memory resource + fixed to 1Gb. Doesn't fit in the resource system so we remove it */ + if ( (dev->vendor == PCI_VENDOR_ID_MOTOROLA) && + ( dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200 + || dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200B) ) { + struct resource *res = &dev->resource[1]; + res->start = res->end = res->flags = 0; + } +} + +int __init +mpc52xx_add_bridge(struct device_node *node) +{ + int len; + struct mpc52xx_pci __iomem *pci_regs; + struct pci_controller *hose; + const int *bus_range; + struct resource rsrc; + + pr_debug("Adding MPC52xx PCI host bridge %pOF\n", node); + + pci_add_flags(PCI_REASSIGN_ALL_BUS); + + if (of_address_to_resource(node, 0, &rsrc) != 0) { + printk(KERN_ERR "Can't get %pOF resources\n", node); + return -EINVAL; + } + + bus_range = of_get_property(node, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get %pOF bus-range, assume bus 0\n", + node); + bus_range = NULL; + } + + /* There are some PCI quirks on the 52xx, register the hook to + * fix them. */ + ppc_md.pcibios_fixup_resources = mpc52xx_pci_fixup_resources; + + /* Alloc and initialize the pci controller. Values in the device + * tree are needed to configure the 52xx PCI controller. Rather + * than parse the tree here, let pci_process_bridge_OF_ranges() + * do it for us and extract the values after the fact */ + hose = pcibios_alloc_controller(node); + if (!hose) + return -ENOMEM; + + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + hose->ops = &mpc52xx_pci_ops; + + pci_regs = ioremap(rsrc.start, resource_size(&rsrc)); + if (!pci_regs) + return -ENOMEM; + + pci_process_bridge_OF_ranges(hose, node, 1); + + /* Finish setting up PCI using values obtained by + * pci_proces_bridge_OF_ranges */ + mpc52xx_pci_setup(hose, pci_regs, rsrc.start); + + return 0; +} + +void __init mpc52xx_setup_pci(void) +{ + struct device_node *pci; + + pci = of_find_matching_node(NULL, mpc52xx_pci_ids); + if (!pci) + return; + + mpc52xx_add_bridge(pci); + of_node_put(pci); +} diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c new file mode 100644 index 0000000000..1e0a5e9644 --- /dev/null +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -0,0 +1,519 @@ +/* + * + * Programmable Interrupt Controller functions for the Freescale MPC52xx. + * + * Copyright (C) 2008 Secret Lab Technologies Ltd. + * Copyright (C) 2006 bplan GmbH + * Copyright (C) 2004 Sylvain Munaut + * Copyright (C) 2003 Montavista Software, Inc + * + * Based on the code from the 2.4 kernel by + * Dale Farnsworth and Kent Borg. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + */ + +/* + * This is the device driver for the MPC5200 interrupt controller. + * + * hardware overview + * ----------------- + * The MPC5200 interrupt controller groups the all interrupt sources into + * three groups called 'critical', 'main', and 'peripheral'. The critical + * group has 3 irqs, External IRQ0, slice timer 0 irq, and wake from deep + * sleep. Main group include the other 3 external IRQs, slice timer 1, RTC, + * gpios, and the general purpose timers. Peripheral group contains the + * remaining irq sources from all of the on-chip peripherals (PSCs, Ethernet, + * USB, DMA, etc). + * + * virqs + * ----- + * The Linux IRQ subsystem requires that each irq source be assigned a + * system wide unique IRQ number starting at 1 (0 means no irq). Since + * systems can have multiple interrupt controllers, the virtual IRQ (virq) + * infrastructure lets each interrupt controller to define a local set + * of IRQ numbers and the virq infrastructure maps those numbers into + * a unique range of the global IRQ# space. + * + * To define a range of virq numbers for this controller, this driver first + * assigns a number to each of the irq groups (called the level 1 or L1 + * value). Within each group individual irq sources are also assigned a + * number, as defined by the MPC5200 user guide, and refers to it as the + * level 2 or L2 value. The virq number is determined by shifting up the + * L1 value by MPC52xx_IRQ_L1_OFFSET and ORing it with the L2 value. + * + * For example, the TMR0 interrupt is irq 9 in the main group. The + * virq for TMR0 is calculated by ((1 << MPC52xx_IRQ_L1_OFFSET) | 9). + * + * The observant reader will also notice that this driver defines a 4th + * interrupt group called 'bestcomm'. The bestcomm group isn't physically + * part of the MPC5200 interrupt controller, but it is used here to assign + * a separate virq number for each bestcomm task (since any of the 16 + * bestcomm tasks can cause the bestcomm interrupt to be raised). When a + * bestcomm interrupt occurs (peripheral group, irq 0) this driver determines + * which task needs servicing and returns the irq number for that task. This + * allows drivers which use bestcomm to define their own interrupt handlers. + * + * irq_chip structures + * ------------------- + * For actually manipulating IRQs (masking, enabling, clearing, etc) this + * driver defines four separate 'irq_chip' structures, one for the main + * group, one for the peripherals group, one for the bestcomm group and one + * for external interrupts. The irq_chip structures provide the hooks needed + * to manipulate each IRQ source, and since each group is has a separate set + * of registers for controlling the irq, it makes sense to divide up the + * hooks along those lines. + * + * You'll notice that there is not an irq_chip for the critical group and + * you'll also notice that there is an irq_chip defined for external + * interrupts even though there is no external interrupt group. The reason + * for this is that the four external interrupts are all managed with the same + * register even though one of the external IRQs is in the critical group and + * the other three are in the main group. For this reason it makes sense for + * the 4 external irqs to be managed using a separate set of hooks. The + * reason there is no crit irq_chip is that of the 3 irqs in the critical + * group, only external interrupt is actually support at this time by this + * driver and since external interrupt is the only one used, it can just + * be directed to make use of the external irq irq_chip. + * + * device tree bindings + * -------------------- + * The device tree bindings for this controller reflect the two level + * organization of irqs in the device. #interrupt-cells = <3> where the + * first cell is the group number [0..3], the second cell is the irq + * number in the group, and the third cell is the sense type (level/edge). + * For reference, the following is a list of the interrupt property values + * associated with external interrupt sources on the MPC5200 (just because + * it is non-obvious to determine what the interrupts property should be + * when reading the mpc5200 manual and it is a frequently asked question). + * + * External interrupts: + * <0 0 n> external irq0, n is sense (n=0: level high, + * <1 1 n> external irq1, n is sense n=1: edge rising, + * <1 2 n> external irq2, n is sense n=2: edge falling, + * <1 3 n> external irq3, n is sense n=3: level low) + */ +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include + +/* HW IRQ mapping */ +#define MPC52xx_IRQ_L1_CRIT (0) +#define MPC52xx_IRQ_L1_MAIN (1) +#define MPC52xx_IRQ_L1_PERP (2) +#define MPC52xx_IRQ_L1_SDMA (3) + +#define MPC52xx_IRQ_L1_OFFSET (6) +#define MPC52xx_IRQ_L1_MASK (0x00c0) +#define MPC52xx_IRQ_L2_MASK (0x003f) + +#define MPC52xx_IRQ_HIGHTESTHWIRQ (0xd0) + + +/* MPC5200 device tree match tables */ +static const struct of_device_id mpc52xx_pic_ids[] __initconst = { + { .compatible = "fsl,mpc5200-pic", }, + { .compatible = "mpc5200-pic", }, + {} +}; +static const struct of_device_id mpc52xx_sdma_ids[] __initconst = { + { .compatible = "fsl,mpc5200-bestcomm", }, + { .compatible = "mpc5200-bestcomm", }, + {} +}; + +static struct mpc52xx_intr __iomem *intr; +static struct mpc52xx_sdma __iomem *sdma; +static struct irq_domain *mpc52xx_irqhost = NULL; + +static unsigned char mpc52xx_map_senses[4] = { + IRQ_TYPE_LEVEL_HIGH, + IRQ_TYPE_EDGE_RISING, + IRQ_TYPE_EDGE_FALLING, + IRQ_TYPE_LEVEL_LOW, +}; + +/* Utility functions */ +static inline void io_be_setbit(u32 __iomem *addr, int bitno) +{ + out_be32(addr, in_be32(addr) | (1 << bitno)); +} + +static inline void io_be_clrbit(u32 __iomem *addr, int bitno) +{ + out_be32(addr, in_be32(addr) & ~(1 << bitno)); +} + +/* + * IRQ[0-3] interrupt irq_chip + */ +static void mpc52xx_extirq_mask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_clrbit(&intr->ctrl, 11 - l2irq); +} + +static void mpc52xx_extirq_unmask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_setbit(&intr->ctrl, 11 - l2irq); +} + +static void mpc52xx_extirq_ack(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_setbit(&intr->ctrl, 27-l2irq); +} + +static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type) +{ + u32 ctrl_reg, type; + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + void *handler = handle_level_irq; + + pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__, + (int) irqd_to_hwirq(d), l2irq, flow_type); + + switch (flow_type) { + case IRQF_TRIGGER_HIGH: type = 0; break; + case IRQF_TRIGGER_RISING: type = 1; handler = handle_edge_irq; break; + case IRQF_TRIGGER_FALLING: type = 2; handler = handle_edge_irq; break; + case IRQF_TRIGGER_LOW: type = 3; break; + default: + type = 0; + } + + ctrl_reg = in_be32(&intr->ctrl); + ctrl_reg &= ~(0x3 << (22 - (l2irq * 2))); + ctrl_reg |= (type << (22 - (l2irq * 2))); + out_be32(&intr->ctrl, ctrl_reg); + + irq_set_handler_locked(d, handler); + + return 0; +} + +static struct irq_chip mpc52xx_extirq_irqchip = { + .name = "MPC52xx External", + .irq_mask = mpc52xx_extirq_mask, + .irq_unmask = mpc52xx_extirq_unmask, + .irq_ack = mpc52xx_extirq_ack, + .irq_set_type = mpc52xx_extirq_set_type, +}; + +/* + * Main interrupt irq_chip + */ +static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type) +{ + return 0; /* Do nothing so that the sense mask will get updated */ +} + +static void mpc52xx_main_mask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_setbit(&intr->main_mask, 16 - l2irq); +} + +static void mpc52xx_main_unmask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_clrbit(&intr->main_mask, 16 - l2irq); +} + +static struct irq_chip mpc52xx_main_irqchip = { + .name = "MPC52xx Main", + .irq_mask = mpc52xx_main_mask, + .irq_mask_ack = mpc52xx_main_mask, + .irq_unmask = mpc52xx_main_unmask, + .irq_set_type = mpc52xx_null_set_type, +}; + +/* + * Peripherals interrupt irq_chip + */ +static void mpc52xx_periph_mask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_setbit(&intr->per_mask, 31 - l2irq); +} + +static void mpc52xx_periph_unmask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_clrbit(&intr->per_mask, 31 - l2irq); +} + +static struct irq_chip mpc52xx_periph_irqchip = { + .name = "MPC52xx Peripherals", + .irq_mask = mpc52xx_periph_mask, + .irq_mask_ack = mpc52xx_periph_mask, + .irq_unmask = mpc52xx_periph_unmask, + .irq_set_type = mpc52xx_null_set_type, +}; + +/* + * SDMA interrupt irq_chip + */ +static void mpc52xx_sdma_mask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_setbit(&sdma->IntMask, l2irq); +} + +static void mpc52xx_sdma_unmask(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + io_be_clrbit(&sdma->IntMask, l2irq); +} + +static void mpc52xx_sdma_ack(struct irq_data *d) +{ + int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK; + out_be32(&sdma->IntPend, 1 << l2irq); +} + +static struct irq_chip mpc52xx_sdma_irqchip = { + .name = "MPC52xx SDMA", + .irq_mask = mpc52xx_sdma_mask, + .irq_unmask = mpc52xx_sdma_unmask, + .irq_ack = mpc52xx_sdma_ack, + .irq_set_type = mpc52xx_null_set_type, +}; + +/** + * mpc52xx_is_extirq - Returns true if hwirq number is for an external IRQ + */ +static int mpc52xx_is_extirq(int l1, int l2) +{ + return ((l1 == 0) && (l2 == 0)) || + ((l1 == 1) && (l2 >= 1) && (l2 <= 3)); +} + +/** + * mpc52xx_irqhost_xlate - translate virq# from device tree interrupts property + */ +static int mpc52xx_irqhost_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, + unsigned int *out_flags) +{ + int intrvect_l1; + int intrvect_l2; + int intrvect_type; + int intrvect_linux; + + if (intsize != 3) + return -1; + + intrvect_l1 = (int)intspec[0]; + intrvect_l2 = (int)intspec[1]; + intrvect_type = (int)intspec[2] & 0x3; + + intrvect_linux = (intrvect_l1 << MPC52xx_IRQ_L1_OFFSET) & + MPC52xx_IRQ_L1_MASK; + intrvect_linux |= intrvect_l2 & MPC52xx_IRQ_L2_MASK; + + *out_hwirq = intrvect_linux; + *out_flags = IRQ_TYPE_LEVEL_LOW; + if (mpc52xx_is_extirq(intrvect_l1, intrvect_l2)) + *out_flags = mpc52xx_map_senses[intrvect_type]; + + pr_debug("return %x, l1=%d, l2=%d\n", intrvect_linux, intrvect_l1, + intrvect_l2); + return 0; +} + +/** + * mpc52xx_irqhost_map - Hook to map from virq to an irq_chip structure + */ +static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t irq) +{ + int l1irq; + int l2irq; + struct irq_chip *irqchip; + void *hndlr; + int type; + u32 reg; + + l1irq = (irq & MPC52xx_IRQ_L1_MASK) >> MPC52xx_IRQ_L1_OFFSET; + l2irq = irq & MPC52xx_IRQ_L2_MASK; + + /* + * External IRQs are handled differently by the hardware so they are + * handled by a dedicated irq_chip structure. + */ + if (mpc52xx_is_extirq(l1irq, l2irq)) { + reg = in_be32(&intr->ctrl); + type = mpc52xx_map_senses[(reg >> (22 - l2irq * 2)) & 0x3]; + if ((type == IRQ_TYPE_EDGE_FALLING) || + (type == IRQ_TYPE_EDGE_RISING)) + hndlr = handle_edge_irq; + else + hndlr = handle_level_irq; + + irq_set_chip_and_handler(virq, &mpc52xx_extirq_irqchip, hndlr); + pr_debug("%s: External IRQ%i virq=%x, hw=%x. type=%x\n", + __func__, l2irq, virq, (int)irq, type); + return 0; + } + + /* It is an internal SOC irq. Choose the correct irq_chip */ + switch (l1irq) { + case MPC52xx_IRQ_L1_MAIN: irqchip = &mpc52xx_main_irqchip; break; + case MPC52xx_IRQ_L1_PERP: irqchip = &mpc52xx_periph_irqchip; break; + case MPC52xx_IRQ_L1_SDMA: irqchip = &mpc52xx_sdma_irqchip; break; + case MPC52xx_IRQ_L1_CRIT: + pr_warn("%s: Critical IRQ #%d is unsupported! Nopping it.\n", + __func__, l2irq); + irq_set_chip(virq, &no_irq_chip); + return 0; + } + + irq_set_chip_and_handler(virq, irqchip, handle_level_irq); + pr_debug("%s: virq=%x, l1=%i, l2=%i\n", __func__, virq, l1irq, l2irq); + + return 0; +} + +static const struct irq_domain_ops mpc52xx_irqhost_ops = { + .xlate = mpc52xx_irqhost_xlate, + .map = mpc52xx_irqhost_map, +}; + +/** + * mpc52xx_init_irq - Initialize and register with the virq subsystem + * + * Hook for setting up IRQs on an mpc5200 system. A pointer to this function + * is to be put into the machine definition structure. + * + * This function searches the device tree for an MPC5200 interrupt controller, + * initializes it, and registers it with the virq subsystem. + */ +void __init mpc52xx_init_irq(void) +{ + u32 intr_ctrl; + struct device_node *picnode; + struct device_node *np; + + /* Remap the necessary zones */ + picnode = of_find_matching_node(NULL, mpc52xx_pic_ids); + intr = of_iomap(picnode, 0); + if (!intr) + panic(__FILE__ ": find_and_map failed on 'mpc5200-pic'. " + "Check node !"); + + np = of_find_matching_node(NULL, mpc52xx_sdma_ids); + sdma = of_iomap(np, 0); + of_node_put(np); + if (!sdma) + panic(__FILE__ ": find_and_map failed on 'mpc5200-bestcomm'. " + "Check node !"); + + pr_debug("MPC5200 IRQ controller mapped to 0x%p\n", intr); + + /* Disable all interrupt sources. */ + out_be32(&sdma->IntPend, 0xffffffff); /* 1 means clear pending */ + out_be32(&sdma->IntMask, 0xffffffff); /* 1 means disabled */ + out_be32(&intr->per_mask, 0x7ffffc00); /* 1 means disabled */ + out_be32(&intr->main_mask, 0x00010fff); /* 1 means disabled */ + intr_ctrl = in_be32(&intr->ctrl); + intr_ctrl &= 0x00ff0000; /* Keeps IRQ[0-3] config */ + intr_ctrl |= 0x0f000000 | /* clear IRQ 0-3 */ + 0x00001000 | /* MEE master external enable */ + 0x00000000 | /* 0 means disable IRQ 0-3 */ + 0x00000001; /* CEb route critical normally */ + out_be32(&intr->ctrl, intr_ctrl); + + /* Zero a bunch of the priority settings. */ + out_be32(&intr->per_pri1, 0); + out_be32(&intr->per_pri2, 0); + out_be32(&intr->per_pri3, 0); + out_be32(&intr->main_pri1, 0); + out_be32(&intr->main_pri2, 0); + + /* + * As last step, add an irq host to translate the real + * hw irq information provided by the ofw to linux virq + */ + mpc52xx_irqhost = irq_domain_add_linear(picnode, + MPC52xx_IRQ_HIGHTESTHWIRQ, + &mpc52xx_irqhost_ops, NULL); + + if (!mpc52xx_irqhost) + panic(__FILE__ ": Cannot allocate the IRQ host\n"); + + irq_set_default_host(mpc52xx_irqhost); + + pr_info("MPC52xx PIC is up and running!\n"); +} + +/** + * mpc52xx_get_irq - Get pending interrupt number hook function + * + * Called by the interrupt handler to determine what IRQ handler needs to be + * executed. + * + * Status of pending interrupts is determined by reading the encoded status + * register. The encoded status register has three fields; one for each of the + * types of interrupts defined by the controller - 'critical', 'main' and + * 'peripheral'. This function reads the status register and returns the IRQ + * number associated with the highest priority pending interrupt. 'Critical' + * interrupts have the highest priority, followed by 'main' interrupts, and + * then 'peripheral'. + * + * The mpc5200 interrupt controller can be configured to boost the priority + * of individual 'peripheral' interrupts. If this is the case then a special + * value will appear in either the crit or main fields indicating a high + * or medium priority peripheral irq has occurred. + * + * This function checks each of the 3 irq request fields and returns the + * first pending interrupt that it finds. + * + * This function also identifies a 4th type of interrupt; 'bestcomm'. Each + * bestcomm DMA task can raise the bestcomm peripheral interrupt. When this + * occurs at task-specific IRQ# is decoded so that each task can have its + * own IRQ handler. + */ +unsigned int mpc52xx_get_irq(void) +{ + u32 status; + int irq; + + status = in_be32(&intr->enc_status); + if (status & 0x00000400) { /* critical */ + irq = (status >> 8) & 0x3; + if (irq == 2) /* high priority peripheral */ + goto peripheral; + irq |= (MPC52xx_IRQ_L1_CRIT << MPC52xx_IRQ_L1_OFFSET); + } else if (status & 0x00200000) { /* main */ + irq = (status >> 16) & 0x1f; + if (irq == 4) /* low priority peripheral */ + goto peripheral; + irq |= (MPC52xx_IRQ_L1_MAIN << MPC52xx_IRQ_L1_OFFSET); + } else if (status & 0x20000000) { /* peripheral */ + peripheral: + irq = (status >> 24) & 0x1f; + if (irq == 0) { /* bestcomm */ + status = in_be32(&sdma->IntPend); + irq = ffs(status) - 1; + irq |= (MPC52xx_IRQ_L1_SDMA << MPC52xx_IRQ_L1_OFFSET); + } else { + irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET); + } + } else { + return 0; + } + + return irq_linear_revmap(mpc52xx_irqhost, irq); +} diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c new file mode 100644 index 0000000000..f0c31ae15d --- /dev/null +++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include +#include +#include + +/* these are defined in mpc52xx_sleep.S, and only used here */ +extern void mpc52xx_deep_sleep(void __iomem *sram, void __iomem *sdram_regs, + struct mpc52xx_cdm __iomem *, struct mpc52xx_intr __iomem*); +extern void mpc52xx_ds_sram(void); +extern const long mpc52xx_ds_sram_size; +extern void mpc52xx_ds_cached(void); +extern const long mpc52xx_ds_cached_size; + +static void __iomem *mbar; +static void __iomem *sdram; +static struct mpc52xx_cdm __iomem *cdm; +static struct mpc52xx_intr __iomem *intr; +static struct mpc52xx_gpio_wkup __iomem *gpiow; +static void __iomem *sram; +static int sram_size; + +struct mpc52xx_suspend mpc52xx_suspend; + +static int mpc52xx_pm_valid(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_STANDBY: + return 1; + default: + return 0; + } +} + +int mpc52xx_set_wakeup_gpio(u8 pin, u8 level) +{ + u16 tmp; + + /* enable gpio */ + out_8(&gpiow->wkup_gpioe, in_8(&gpiow->wkup_gpioe) | (1 << pin)); + /* set as input */ + out_8(&gpiow->wkup_ddr, in_8(&gpiow->wkup_ddr) & ~(1 << pin)); + /* enable deep sleep interrupt */ + out_8(&gpiow->wkup_inten, in_8(&gpiow->wkup_inten) | (1 << pin)); + /* low/high level creates wakeup interrupt */ + tmp = in_be16(&gpiow->wkup_itype); + tmp &= ~(0x3 << (pin * 2)); + tmp |= (!level + 1) << (pin * 2); + out_be16(&gpiow->wkup_itype, tmp); + /* master enable */ + out_8(&gpiow->wkup_maste, 1); + + return 0; +} + +int mpc52xx_pm_prepare(void) +{ + struct device_node *np; + static const struct of_device_id immr_ids[] = { + { .compatible = "fsl,mpc5200-immr", }, + { .compatible = "fsl,mpc5200b-immr", }, + { .type = "soc", .compatible = "mpc5200", }, /* lite5200 */ + { .type = "builtin", .compatible = "mpc5200", }, /* efika */ + {} + }; + struct resource res; + + /* map the whole register space */ + np = of_find_matching_node(NULL, immr_ids); + + if (of_address_to_resource(np, 0, &res)) { + pr_err("mpc52xx_pm_prepare(): could not get IMMR address\n"); + of_node_put(np); + return -ENOSYS; + } + + mbar = ioremap(res.start, 0xc000); /* we should map whole region including SRAM */ + + of_node_put(np); + if (!mbar) { + pr_err("mpc52xx_pm_prepare(): could not map registers\n"); + return -ENOSYS; + } + /* these offsets are from mpc5200 users manual */ + sdram = mbar + 0x100; + cdm = mbar + 0x200; + intr = mbar + 0x500; + gpiow = mbar + 0xc00; + sram = mbar + 0x8000; /* Those will be handled by the */ + sram_size = 0x4000; /* bestcomm driver soon */ + + /* call board suspend code, if applicable */ + if (mpc52xx_suspend.board_suspend_prepare) + mpc52xx_suspend.board_suspend_prepare(mbar); + else { + printk(KERN_ALERT "%s: %i don't know how to wake up the board\n", + __func__, __LINE__); + goto out_unmap; + } + + return 0; + + out_unmap: + iounmap(mbar); + return -ENOSYS; +} + + +char saved_sram[0x4000]; + +int mpc52xx_pm_enter(suspend_state_t state) +{ + u32 clk_enables; + u32 msr, hid0; + u32 intr_main_mask; + void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500; + unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size; + char saved_0x500[0x600-0x500]; + + if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500))) + return -ENOMEM; + + /* disable all interrupts in PIC */ + intr_main_mask = in_be32(&intr->main_mask); + out_be32(&intr->main_mask, intr_main_mask | 0x1ffff); + + /* don't let DEC expire any time soon */ + mtspr(SPRN_DEC, 0x7fffffff); + + /* save SRAM */ + memcpy(saved_sram, sram, sram_size); + + /* copy low level suspend code to sram */ + memcpy(sram, mpc52xx_ds_sram, mpc52xx_ds_sram_size); + + out_8(&cdm->ccs_sleep_enable, 1); + out_8(&cdm->osc_sleep_enable, 1); + out_8(&cdm->ccs_qreq_test, 1); + + /* disable all but SDRAM and bestcomm (SRAM) clocks */ + clk_enables = in_be32(&cdm->clk_enables); + out_be32(&cdm->clk_enables, clk_enables & 0x00088000); + + /* disable power management */ + msr = mfmsr(); + mtmsr(msr & ~MSR_POW); + + /* enable sleep mode, disable others */ + hid0 = mfspr(SPRN_HID0); + mtspr(SPRN_HID0, (hid0 & ~(HID0_DOZE | HID0_NAP | HID0_DPM)) | HID0_SLEEP); + + /* save original, copy our irq handler, flush from dcache and invalidate icache */ + memcpy(saved_0x500, irq_0x500, mpc52xx_ds_cached_size); + memcpy(irq_0x500, mpc52xx_ds_cached, mpc52xx_ds_cached_size); + flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop); + + /* call low-level sleep code */ + mpc52xx_deep_sleep(sram, sdram, cdm, intr); + + /* restore original irq handler */ + memcpy(irq_0x500, saved_0x500, mpc52xx_ds_cached_size); + flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop); + + /* restore old power mode */ + mtmsr(msr & ~MSR_POW); + mtspr(SPRN_HID0, hid0); + mtmsr(msr); + + out_be32(&cdm->clk_enables, clk_enables); + out_8(&cdm->ccs_sleep_enable, 0); + out_8(&cdm->osc_sleep_enable, 0); + + /* restore SRAM */ + memcpy(sram, saved_sram, sram_size); + + /* reenable interrupts in PIC */ + out_be32(&intr->main_mask, intr_main_mask); + + return 0; +} + +void mpc52xx_pm_finish(void) +{ + /* call board resume code */ + if (mpc52xx_suspend.board_resume_finish) + mpc52xx_suspend.board_resume_finish(mbar); + + iounmap(mbar); +} + +static const struct platform_suspend_ops mpc52xx_pm_ops = { + .valid = mpc52xx_pm_valid, + .prepare = mpc52xx_pm_prepare, + .enter = mpc52xx_pm_enter, + .finish = mpc52xx_pm_finish, +}; + +int __init mpc52xx_pm_init(void) +{ + suspend_set_ops(&mpc52xx_pm_ops); + return 0; +} diff --git a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S new file mode 100644 index 0000000000..a66eb311b6 --- /dev/null +++ b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include + + +.text + +_GLOBAL(mpc52xx_deep_sleep) +mpc52xx_deep_sleep: /* args r3-r6: SRAM, SDRAM regs, CDM regs, INTR regs */ + + /* enable interrupts */ + mfmsr r7 + ori r7, r7, 0x8000 /* EE */ + mtmsr r7 + sync; isync; + + li r10, 0 /* flag that irq handler sets */ + + /* enable tmr7 (or any other) interrupt */ + lwz r8, 0x14(r6) /* intr->main_mask */ + ori r8, r8, 0x1 + xori r8, r8, 0x1 + stw r8, 0x14(r6) + sync + + /* emulate tmr7 interrupt */ + li r8, 0x1 + stw r8, 0x40(r6) /* intr->main_emulate */ + sync + + /* wait for it to happen */ +1: + cmpi cr0, r10, 1 + bne cr0, 1b + + /* lock icache */ + mfspr r10, SPRN_HID0 + ori r10, r10, 0x2000 + sync; isync; + mtspr SPRN_HID0, r10 + sync; isync; + + + mflr r9 /* save LR */ + + /* jump to sram */ + mtlr r3 + blrl + + mtlr r9 /* restore LR */ + + /* unlock icache */ + mfspr r10, SPRN_HID0 + ori r10, r10, 0x2000 + xori r10, r10, 0x2000 + sync; isync; + mtspr SPRN_HID0, r10 + sync; isync; + + + /* return to C code */ + blr + + +_GLOBAL(mpc52xx_ds_sram) +mpc52xx_ds_sram: + /* put SDRAM into self-refresh */ + lwz r8, 0x4(r4) /* sdram->ctrl */ + + oris r8, r8, 0x8000 /* mode_en */ + stw r8, 0x4(r4) + sync + + ori r8, r8, 0x0002 /* soft_pre */ + stw r8, 0x4(r4) + sync + xori r8, r8, 0x0002 + + xoris r8, r8, 0x8000 /* !mode_en */ + stw r8, 0x4(r4) + sync + + oris r8, r8, 0x5000 + xoris r8, r8, 0x4000 /* ref_en !cke */ + stw r8, 0x4(r4) + sync + + /* disable SDRAM clock */ + lwz r8, 0x14(r5) /* cdm->clkenable */ + ori r8, r8, 0x0008 + xori r8, r8, 0x0008 + stw r8, 0x14(r5) + sync + + + /* put mpc5200 to sleep */ + mfmsr r10 + oris r10, r10, 0x0004 /* POW = 1 */ + sync; isync; + mtmsr r10 + sync; isync; + + + /* enable clock */ + lwz r8, 0x14(r5) + ori r8, r8, 0x0008 + stw r8, 0x14(r5) + sync + + /* get ram out of self-refresh */ + lwz r8, 0x4(r4) + oris r8, r8, 0x5000 /* cke ref_en */ + stw r8, 0x4(r4) + sync + + blr +_GLOBAL(mpc52xx_ds_sram_size) +mpc52xx_ds_sram_size: + .long $-mpc52xx_ds_sram + + +/* ### interrupt handler for wakeup from deep-sleep ### */ +_GLOBAL(mpc52xx_ds_cached) +mpc52xx_ds_cached: + mtspr SPRN_SPRG0, r7 + mtspr SPRN_SPRG1, r8 + + /* disable emulated interrupt */ + mfspr r7, 311 /* MBAR */ + addi r7, r7, 0x540 /* intr->main_emul */ + li r8, 0 + stw r8, 0(r7) + sync + dcbf 0, r7 + + /* acknowledge wakeup, so CCS releases power pown */ + mfspr r7, 311 /* MBAR */ + addi r7, r7, 0x524 /* intr->enc_status */ + lwz r8, 0(r7) + ori r8, r8, 0x0400 + stw r8, 0(r7) + sync + dcbf 0, r7 + + /* flag - we handled the interrupt */ + li r10, 1 + + mfspr r8, SPRN_SPRG1 + mfspr r7, SPRN_SPRG0 + + rfi +_GLOBAL(mpc52xx_ds_cached_size) +mpc52xx_ds_cached_size: + .long $-mpc52xx_ds_cached diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig new file mode 100644 index 0000000000..1824536cf6 --- /dev/null +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +menuconfig PPC_82xx + bool "82xx-based boards (PQ II)" + depends on PPC_BOOK3S_32 + select FSL_SOC + +if PPC_82xx + +config EP8248E + bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)" + select CPM2 + select PPC_INDIRECT_PCI if PCI + select PHYLIB if NETDEVICES + select MDIO_BITBANG if PHYLIB + help + This enables support for the Embedded Planet EP8248E board. + + This board is also resold by Freescale as the QUICCStart + MPC8248 Evaluation System and/or the CWH-PPC-8248N-VE. + +config MGCOGE + bool "Keymile MGCOGE" + select CPM2 + select PPC_INDIRECT_PCI if PCI + help + This enables support for the Keymile MGCOGE board. + +endif diff --git a/arch/powerpc/platforms/82xx/Makefile b/arch/powerpc/platforms/82xx/Makefile new file mode 100644 index 0000000000..4fa43a5cd5 --- /dev/null +++ b/arch/powerpc/platforms/82xx/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PowerPC 82xx linux kernel. +# +obj-$(CONFIG_CPM2) += pq2.o +obj-$(CONFIG_EP8248E) += ep8248e.o +obj-$(CONFIG_MGCOGE) += km82xx.o diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c new file mode 100644 index 0000000000..3dc65ce1f1 --- /dev/null +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Embedded Planet EP8248E support + * + * Copyright 2007 Freescale Semiconductor, Inc. + * Author: Scott Wood + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "pq2.h" + +static u8 __iomem *ep8248e_bcsr; +static struct device_node *ep8248e_bcsr_node; + +#define BCSR7_SCC2_ENABLE 0x10 + +#define BCSR8_PHY1_ENABLE 0x80 +#define BCSR8_PHY1_POWER 0x40 +#define BCSR8_PHY2_ENABLE 0x20 +#define BCSR8_PHY2_POWER 0x10 +#define BCSR8_MDIO_READ 0x04 +#define BCSR8_MDIO_CLOCK 0x02 +#define BCSR8_MDIO_DATA 0x01 + +#define BCSR9_USB_ENABLE 0x80 +#define BCSR9_USB_POWER 0x40 +#define BCSR9_USB_HOST 0x20 +#define BCSR9_USB_FULL_SPEED_TARGET 0x10 + +static void __init ep8248e_pic_init(void) +{ + struct device_node *np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic"); + if (!np) { + printk(KERN_ERR "PIC init: can not find cpm-pic node\n"); + return; + } + + cpm2_pic_init(np); + of_node_put(np); +} + +static void ep8248e_set_mdc(struct mdiobb_ctrl *ctrl, int level) +{ + if (level) + setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK); + else + clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK); + + /* Read back to flush the write. */ + in_8(&ep8248e_bcsr[8]); +} + +static void ep8248e_set_mdio_dir(struct mdiobb_ctrl *ctrl, int output) +{ + if (output) + clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ); + else + setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ); + + /* Read back to flush the write. */ + in_8(&ep8248e_bcsr[8]); +} + +static void ep8248e_set_mdio_data(struct mdiobb_ctrl *ctrl, int data) +{ + if (data) + setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA); + else + clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA); + + /* Read back to flush the write. */ + in_8(&ep8248e_bcsr[8]); +} + +static int ep8248e_get_mdio_data(struct mdiobb_ctrl *ctrl) +{ + return in_8(&ep8248e_bcsr[8]) & BCSR8_MDIO_DATA; +} + +static const struct mdiobb_ops ep8248e_mdio_ops = { + .set_mdc = ep8248e_set_mdc, + .set_mdio_dir = ep8248e_set_mdio_dir, + .set_mdio_data = ep8248e_set_mdio_data, + .get_mdio_data = ep8248e_get_mdio_data, + .owner = THIS_MODULE, +}; + +static struct mdiobb_ctrl ep8248e_mdio_ctrl = { + .ops = &ep8248e_mdio_ops, +}; + +static int ep8248e_mdio_probe(struct platform_device *ofdev) +{ + struct mii_bus *bus; + struct resource res; + struct device_node *node; + int ret; + + node = of_get_parent(ofdev->dev.of_node); + of_node_put(node); + if (node != ep8248e_bcsr_node) + return -ENODEV; + + ret = of_address_to_resource(ofdev->dev.of_node, 0, &res); + if (ret) + return ret; + + bus = alloc_mdio_bitbang(&ep8248e_mdio_ctrl); + if (!bus) + return -ENOMEM; + + bus->name = "ep8248e-mdio-bitbang"; + bus->parent = &ofdev->dev; + snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); + + ret = of_mdiobus_register(bus, ofdev->dev.of_node); + if (ret) + goto err_free_bus; + + return 0; +err_free_bus: + free_mdio_bitbang(bus); + return ret; +} + +static const struct of_device_id ep8248e_mdio_match[] = { + { + .compatible = "fsl,ep8248e-mdio-bitbang", + }, + {}, +}; + +static struct platform_driver ep8248e_mdio_driver = { + .driver = { + .name = "ep8248e-mdio-bitbang", + .of_match_table = ep8248e_mdio_match, + .suppress_bind_attrs = true, + }, + .probe = ep8248e_mdio_probe, +}; + +struct cpm_pin { + int port, pin, flags; +}; + +static __initdata struct cpm_pin ep8248e_pins[] = { + /* SMC1 */ + {2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + + /* SCC1 */ + {2, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + + /* FCC1 */ + {0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + + /* FCC2 */ + {1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + + /* I2C */ + {4, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {4, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + + /* USB */ + {2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, +}; + +static void __init init_ioports(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ep8248e_pins); i++) { + const struct cpm_pin *pin = &ep8248e_pins[i]; + cpm2_set_pin(pin->port, pin->pin, pin->flags); + } + + cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7); + cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX); + cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_TX); /* USB */ + cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK11, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX); + cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX); +} + +static void __init ep8248e_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("ep8248e_setup_arch()", 0); + + cpm2_reset(); + + /* When this is set, snooping CPM DMA from RAM causes + * machine checks. See erratum SIU18. + */ + clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP); + + ep8248e_bcsr_node = + of_find_compatible_node(NULL, NULL, "fsl,ep8248e-bcsr"); + if (!ep8248e_bcsr_node) { + printk(KERN_ERR "No bcsr in device tree\n"); + return; + } + + ep8248e_bcsr = of_iomap(ep8248e_bcsr_node, 0); + if (!ep8248e_bcsr) { + printk(KERN_ERR "Cannot map BCSR registers\n"); + of_node_put(ep8248e_bcsr_node); + ep8248e_bcsr_node = NULL; + return; + } + + setbits8(&ep8248e_bcsr[7], BCSR7_SCC2_ENABLE); + setbits8(&ep8248e_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER | + BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER); + + init_ioports(); + + if (ppc_md.progress) + ppc_md.progress("ep8248e_setup_arch(), finish", 0); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .compatible = "simple-bus", }, + { .compatible = "fsl,ep8248e-bcsr", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + + if (IS_ENABLED(CONFIG_MDIO_BITBANG)) + platform_driver_register(&ep8248e_mdio_driver); + + return 0; +} +machine_device_initcall(ep8248e, declare_of_platform_devices); + +define_machine(ep8248e) +{ + .name = "Embedded Planet EP8248E", + .compatible = "fsl,ep8248e", + .setup_arch = ep8248e_setup_arch, + .init_IRQ = ep8248e_pic_init, + .get_irq = cpm2_get_irq, + .restart = pq2_restart, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c new file mode 100644 index 0000000000..c86da3f2b7 --- /dev/null +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Keymile km82xx support + * Copyright 2008-2011 DENX Software Engineering GmbH + * Author: Heiko Schocher + * + * based on code from: + * Copyright 2007 Freescale Semiconductor, Inc. + * Author: Scott Wood + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "pq2.h" + +static void __init km82xx_pic_init(void) +{ + struct device_node *np = of_find_compatible_node(NULL, NULL, + "fsl,pq2-pic"); + if (!np) { + pr_err("PIC init: can not find cpm-pic node\n"); + return; + } + + cpm2_pic_init(np); + of_node_put(np); +} + +struct cpm_pin { + int port, pin, flags; +}; + +static __initdata struct cpm_pin km82xx_pins[] = { + /* SMC1 */ + {2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + + /* SMC2 */ + {0, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 9, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + + /* SCC1 */ + {2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + + /* SCC4 */ + {2, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 9, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {3, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {3, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + + /* FCC1 */ + {0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + + {2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + + /* FCC2 */ + {1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + + {2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + + /* MDC */ + {0, 13, CPM_PIN_OUTPUT | CPM_PIN_GPIO}, + +#if defined(CONFIG_I2C_CPM) + /* I2C */ + {3, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN}, + {3, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN}, +#endif + + /* USB */ + {0, 10, CPM_PIN_OUTPUT | CPM_PIN_GPIO}, /* FULL_SPEED */ + {0, 11, CPM_PIN_OUTPUT | CPM_PIN_GPIO}, /*/SLAVE */ + {2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXN */ + {2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXP */ + {2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* /OE */ + {2, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXCLK */ + {3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXP */ + {3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXN */ + {3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXD */ + + /* SPI */ + {3, 16, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MISO PD16 */ + {3, 17, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MOSI PD17 */ + {3, 18, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_CLK PD18 */ +}; + +static void __init init_ioports(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(km82xx_pins); i++) { + const struct cpm_pin *pin = &km82xx_pins[i]; + cpm2_set_pin(pin->port, pin->pin, pin->flags); + } + + cpm2_smc_clk_setup(CPM_CLK_SMC2, CPM_BRG8); + cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7); + cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_TX); + cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_RTX); + cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK7, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK8, CPM_CLK_TX); + cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9, CPM_CLK_TX); + cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX); + + /* Force USB FULL SPEED bit to '1' */ + setbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 10)); + /* clear USB_SLAVE */ + clrbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 11)); +} + +static void __init km82xx_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("km82xx_setup_arch()", 0); + + cpm2_reset(); + + /* When this is set, snooping CPM DMA from RAM causes + * machine checks. See erratum SIU18. + */ + clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP); + + init_ioports(); + + if (ppc_md.progress) + ppc_md.progress("km82xx_setup_arch(), finish", 0); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .compatible = "simple-bus", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + + return 0; +} +machine_device_initcall(km82xx, declare_of_platform_devices); + +define_machine(km82xx) +{ + .name = "Keymile km82xx", + .compatible = "keymile,km82xx", + .setup_arch = km82xx_setup_arch, + .init_IRQ = km82xx_pic_init, + .get_irq = cpm2_get_irq, + .restart = pq2_restart, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c new file mode 100644 index 0000000000..391d72a2e0 --- /dev/null +++ b/arch/powerpc/platforms/82xx/pq2.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Common PowerQUICC II code. + * + * Author: Scott Wood + * Copyright (c) 2007 Freescale Semiconductor + * + * Based on code by Vitaly Bordug + * pq2_restart fix by Wade Farnsworth + * Copyright (c) 2006 MontaVista Software, Inc. + */ + +#include + +#include +#include +#include + +#include + +#define RMR_CSRE 0x00000001 + +void __noreturn pq2_restart(char *cmd) +{ + local_irq_disable(); + setbits32(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE); + + /* Clear the ME,EE,IR & DR bits in MSR to cause checkstop */ + mtmsr(mfmsr() & ~(MSR_ME | MSR_EE | MSR_IR | MSR_DR)); + in_8(&cpm2_immr->im_clkrst.res[0]); + + panic("Restart failed\n"); +} +NOKPROBE_SYMBOL(pq2_restart) diff --git a/arch/powerpc/platforms/82xx/pq2.h b/arch/powerpc/platforms/82xx/pq2.h new file mode 100644 index 0000000000..902ef0bd49 --- /dev/null +++ b/arch/powerpc/platforms/82xx/pq2.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PQ2_H +#define _PQ2_H + +void __noreturn pq2_restart(char *cmd); + +#ifdef CONFIG_PCI +int pq2ads_pci_init_irq(void); +void pq2_init_pci(void); +#else +static inline int pq2ads_pci_init_irq(void) +{ + return 0; +} + +static inline void pq2_init_pci(void) +{ +} +#endif + +#endif diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig new file mode 100644 index 0000000000..d355ad4099 --- /dev/null +++ b/arch/powerpc/platforms/83xx/Kconfig @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0 +menuconfig PPC_83xx + bool "83xx-based boards" + depends on PPC_BOOK3S_32 + select PPC_UDBG_16550 + select HAVE_PCI + select FSL_PCI if PCI + select FSL_SOC + select IPIC + +if PPC_83xx + +config MPC830x_RDB + bool "Freescale MPC830x RDB and derivatives" + select DEFAULT_UIMAGE + select PPC_MPC831x + select FSL_GTM + help + This option enables support for the MPC8308 RDB and MPC8308 P1M boards. + +config MPC831x_RDB + bool "Freescale MPC831x RDB" + select DEFAULT_UIMAGE + select PPC_MPC831x + help + This option enables support for the MPC8313 RDB and MPC8315 RDB boards. + +config MPC832x_RDB + bool "Freescale MPC832x RDB" + select DEFAULT_UIMAGE + select PPC_MPC832x + help + This option enables support for the MPC8323 RDB board. + +config MPC834x_ITX + bool "Freescale MPC834x ITX" + select DEFAULT_UIMAGE + select PPC_MPC834x + help + This option enables support for the MPC 834x ITX evaluation board. + + Be aware that PCI initialization is the bootloader's + responsibility. + +config MPC836x_RDK + bool "Freescale/Logic MPC836x RDK" + select DEFAULT_UIMAGE + select FSL_GTM + select FSL_LBC + help + This option enables support for the MPC836x RDK Processor Board, + also known as ZOOM PowerQUICC Kit. + +config MPC837x_RDB + bool "Freescale MPC837x RDB/WLAN" + select DEFAULT_UIMAGE + select PPC_MPC837x + help + This option enables support for the MPC837x RDB and WLAN Boards. + +config ASP834x + bool "Analogue & Micro ASP 834x" + select PPC_MPC834x + help + This enables support for the Analogue & Micro ASP 83xx + board. + +config KMETER1 + bool "Keymile KMETER1" + select DEFAULT_UIMAGE + select QUICC_ENGINE + help + This enables support for the Keymile KMETER1 board. + + +endif + +# used for usb & gpio +config PPC_MPC831x + bool + +# used for math-emu +config PPC_MPC832x + bool + +# used for usb & gpio +config PPC_MPC834x + bool + +# used for usb & gpio +config PPC_MPC837x + bool diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile new file mode 100644 index 0000000000..6fc3dba943 --- /dev/null +++ b/arch/powerpc/platforms/83xx/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PowerPC 83xx linux kernel. +# +obj-y := misc.o +obj-$(CONFIG_SUSPEND) += suspend.o suspend-asm.o +obj-$(CONFIG_MCU_MPC8349EMITX) += mcu_mpc8349emitx.o +obj-$(CONFIG_MPC830x_RDB) += mpc830x_rdb.o +obj-$(CONFIG_MPC831x_RDB) += mpc831x_rdb.o +obj-$(CONFIG_MPC832x_RDB) += mpc832x_rdb.o +obj-$(CONFIG_MPC834x_ITX) += mpc834x_itx.o +obj-$(CONFIG_MPC836x_RDK) += mpc836x_rdk.o +obj-$(CONFIG_MPC837x_RDB) += mpc837x_rdb.o +obj-$(CONFIG_ASP834x) += asp834x.o +obj-$(CONFIG_KMETER1) += km83xx.o +obj-$(CONFIG_PPC_MPC831x) += usb_831x.o +obj-$(CONFIG_PPC_MPC834x) += usb_834x.o +obj-$(CONFIG_PPC_MPC837x) += usb_837x.o diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c new file mode 100644 index 0000000000..6870d0c34f --- /dev/null +++ b/arch/powerpc/platforms/83xx/asp834x.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/83xx/asp834x.c + * + * Analogue & Micro ASP8347 board specific routines + * clone of mpc834x_itx + * + * Copyright 2008 Codehermit + * + * Maintainer: Bryan O'Donoghue + */ + +#include +#include + +#include +#include +#include + +#include "mpc83xx.h" + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init asp834x_setup_arch(void) +{ + mpc83xx_setup_arch(); + mpc834x_usb_cfg(); +} + +machine_device_initcall(asp834x, mpc83xx_declare_of_platform_devices); + +define_machine(asp834x) { + .name = "ASP8347E", + .compatible = "analogue-and-micro,asp8347e", + .setup_arch = asp834x_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c new file mode 100644 index 0000000000..2b5d187d9b --- /dev/null +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008-2011 DENX Software Engineering GmbH + * Author: Heiko Schocher + * + * Description: + * Keymile 83xx platform specific routines. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc83xx.h" + +#define SVR_REV(svr) (((svr) >> 0) & 0xFFFF) /* Revision field */ + +static void __init quirk_mpc8360e_qe_enet10(void) +{ + /* + * handle mpc8360E Erratum QE_ENET10: + * RGMII AC values do not meet the specification + */ + uint svid = mfspr(SPRN_SVR); + struct device_node *np_par; + struct resource res; + void __iomem *base; + int ret; + + np_par = of_find_node_by_name(NULL, "par_io"); + if (np_par == NULL) { + pr_warn("%s couldn't find par_io node\n", __func__); + return; + } + /* Map Parallel I/O ports registers */ + ret = of_address_to_resource(np_par, 0, &res); + if (ret) { + pr_warn("%s couldn't map par_io registers\n", __func__); + goto out; + } + + base = ioremap(res.start, resource_size(&res)); + if (!base) + goto out; + + /* + * set output delay adjustments to default values according + * table 5 in Errata Rev. 5, 9/2011: + * + * write 0b01 to UCC1 bits 18:19 + * write 0b01 to UCC2 option 1 bits 4:5 + * write 0b01 to UCC2 option 2 bits 16:17 + */ + clrsetbits_be32((base + 0xa8), 0x0c00f000, 0x04005000); + + /* + * set output delay adjustments to default values according + * table 3-13 in Reference Manual Rev.3 05/2010: + * + * write 0b01 to UCC2 option 2 bits 16:17 + * write 0b0101 to UCC1 bits 20:23 + * write 0b0101 to UCC2 option 1 bits 24:27 + */ + clrsetbits_be32((base + 0xac), 0x0000cff0, 0x00004550); + + if (SVR_REV(svid) == 0x0021) { + /* + * UCC2 option 1: write 0b1010 to bits 24:27 + * at address IMMRBAR+0x14AC + */ + clrsetbits_be32((base + 0xac), 0x000000f0, 0x000000a0); + } else if (SVR_REV(svid) == 0x0020) { + /* + * UCC1: write 0b11 to bits 18:19 + * at address IMMRBAR+0x14A8 + */ + setbits32((base + 0xa8), 0x00003000); + + /* + * UCC2 option 1: write 0b11 to bits 4:5 + * at address IMMRBAR+0x14A8 + */ + setbits32((base + 0xa8), 0x0c000000); + + /* + * UCC2 option 2: write 0b11 to bits 16:17 + * at address IMMRBAR+0x14AC + */ + setbits32((base + 0xac), 0x0000c000); + } + iounmap(base); +out: + of_node_put(np_par); +} + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init mpc83xx_km_setup_arch(void) +{ +#ifdef CONFIG_QUICC_ENGINE + struct device_node *np; +#endif + + mpc83xx_setup_arch(); + +#ifdef CONFIG_QUICC_ENGINE + np = of_find_node_by_name(NULL, "par_io"); + if (np != NULL) { + par_io_init(np); + of_node_put(np); + + for_each_node_by_name(np, "spi") + par_io_of_config(np); + + for_each_node_by_name(np, "ucc") + par_io_of_config(np); + + /* Only apply this quirk when par_io is available */ + np = of_find_compatible_node(NULL, "network", "ucc_geth"); + if (np != NULL) { + quirk_mpc8360e_qe_enet10(); + of_node_put(np); + } + } +#endif /* CONFIG_QUICC_ENGINE */ +} + +machine_device_initcall(mpc83xx_km, mpc83xx_declare_of_platform_devices); + +/* list of the supported boards */ +static char *board[] __initdata = { + "Keymile,KMETER1", + "Keymile,kmpbec8321", + NULL +}; + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc83xx_km_probe(void) +{ + int i = 0; + + while (board[i]) { + if (of_machine_is_compatible(board[i])) + break; + i++; + } + return (board[i] != NULL); +} + +define_machine(mpc83xx_km) { + .name = "mpc83xx-km-platform", + .probe = mpc83xx_km_probe, + .setup_arch = mpc83xx_km_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c new file mode 100644 index 0000000000..4d8fa9ed1a --- /dev/null +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Power Management and GPIO expander driver for MPC8349E-mITX-compatible MCU + * + * Copyright (c) 2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * I don't have specifications for the MCU firmware, I found this register + * and bits positions by the trial&error method. + */ +#define MCU_REG_CTRL 0x20 +#define MCU_CTRL_POFF 0x40 +#define MCU_CTRL_BTN 0x80 + +#define MCU_NUM_GPIO 2 + +struct mcu { + struct mutex lock; + struct i2c_client *client; + struct gpio_chip gc; + u8 reg_ctrl; +}; + +static struct mcu *glob_mcu; + +struct task_struct *shutdown_thread; +static int shutdown_thread_fn(void *data) +{ + int ret; + struct mcu *mcu = glob_mcu; + + while (!kthread_should_stop()) { + ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL); + if (ret < 0) + pr_err("MCU status reg read failed.\n"); + mcu->reg_ctrl = ret; + + + if (mcu->reg_ctrl & MCU_CTRL_BTN) { + i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, + mcu->reg_ctrl & ~MCU_CTRL_BTN); + + ctrl_alt_del(); + } + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } + + return 0; +} + +static ssize_t show_status(struct device *d, + struct device_attribute *attr, char *buf) +{ + int ret; + struct mcu *mcu = glob_mcu; + + ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL); + if (ret < 0) + return -ENODEV; + mcu->reg_ctrl = ret; + + return sprintf(buf, "%02x\n", ret); +} +static DEVICE_ATTR(status, 0444, show_status, NULL); + +static void mcu_power_off(void) +{ + struct mcu *mcu = glob_mcu; + + pr_info("Sending power-off request to the MCU...\n"); + mutex_lock(&mcu->lock); + i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, + mcu->reg_ctrl | MCU_CTRL_POFF); + mutex_unlock(&mcu->lock); +} + +static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct mcu *mcu = gpiochip_get_data(gc); + u8 bit = 1 << (4 + gpio); + + mutex_lock(&mcu->lock); + if (val) + mcu->reg_ctrl &= ~bit; + else + mcu->reg_ctrl |= bit; + + i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl); + mutex_unlock(&mcu->lock); +} + +static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + mcu_gpio_set(gc, gpio, val); + return 0; +} + +static int mcu_gpiochip_add(struct mcu *mcu) +{ + struct device *dev = &mcu->client->dev; + struct gpio_chip *gc = &mcu->gc; + + gc->owner = THIS_MODULE; + gc->label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(dev)); + gc->can_sleep = 1; + gc->ngpio = MCU_NUM_GPIO; + gc->base = -1; + gc->set = mcu_gpio_set; + gc->direction_output = mcu_gpio_dir_out; + gc->parent = dev; + + return gpiochip_add_data(gc, mcu); +} + +static void mcu_gpiochip_remove(struct mcu *mcu) +{ + kfree(mcu->gc.label); + gpiochip_remove(&mcu->gc); +} + +static int mcu_probe(struct i2c_client *client) +{ + struct mcu *mcu; + int ret; + + mcu = kzalloc(sizeof(*mcu), GFP_KERNEL); + if (!mcu) + return -ENOMEM; + + mutex_init(&mcu->lock); + mcu->client = client; + i2c_set_clientdata(client, mcu); + + ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL); + if (ret < 0) + goto err; + mcu->reg_ctrl = ret; + + ret = mcu_gpiochip_add(mcu); + if (ret) + goto err; + + /* XXX: this is potentially racy, but there is no lock for pm_power_off */ + if (!pm_power_off) { + glob_mcu = mcu; + pm_power_off = mcu_power_off; + dev_info(&client->dev, "will provide power-off service\n"); + } + + if (device_create_file(&client->dev, &dev_attr_status)) + dev_err(&client->dev, + "couldn't create device file for status\n"); + + shutdown_thread = kthread_run(shutdown_thread_fn, NULL, + "mcu-i2c-shdn"); + + return 0; +err: + kfree(mcu); + return ret; +} + +static void mcu_remove(struct i2c_client *client) +{ + struct mcu *mcu = i2c_get_clientdata(client); + + kthread_stop(shutdown_thread); + + device_remove_file(&client->dev, &dev_attr_status); + + if (glob_mcu == mcu) { + pm_power_off = NULL; + glob_mcu = NULL; + } + + mcu_gpiochip_remove(mcu); + kfree(mcu); +} + +static const struct i2c_device_id mcu_ids[] = { + { "mcu-mpc8349emitx", }, + {}, +}; +MODULE_DEVICE_TABLE(i2c, mcu_ids); + +static const struct of_device_id mcu_of_match_table[] = { + { .compatible = "fsl,mcu-mpc8349emitx", }, + { }, +}; + +static struct i2c_driver mcu_driver = { + .driver = { + .name = "mcu-mpc8349emitx", + .of_match_table = mcu_of_match_table, + }, + .probe = mcu_probe, + .remove = mcu_remove, + .id_table = mcu_ids, +}; + +module_i2c_driver(mcu_driver); + +MODULE_DESCRIPTION("Power Management and GPIO expander driver for " + "MPC8349E-mITX-compatible MCU"); +MODULE_AUTHOR("Anton Vorontsov "); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c new file mode 100644 index 0000000000..2fb2a85d13 --- /dev/null +++ b/arch/powerpc/platforms/83xx/misc.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * misc setup functions for MPC83xx + * + * Maintainer: Kumar Gala + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpc83xx.h" + +static __be32 __iomem *restart_reg_base; + +static int __init mpc83xx_restart_init(void) +{ + /* map reset restart_reg_baseister space */ + restart_reg_base = ioremap(get_immrbase() + 0x900, 0xff); + + return 0; +} + +arch_initcall(mpc83xx_restart_init); + +void __noreturn mpc83xx_restart(char *cmd) +{ +#define RST_OFFSET 0x00000900 +#define RST_PROT_REG 0x00000018 +#define RST_CTRL_REG 0x0000001c + + local_irq_disable(); + + if (restart_reg_base) { + /* enable software reset "RSTE" */ + out_be32(restart_reg_base + (RST_PROT_REG >> 2), 0x52535445); + + /* set software hard reset */ + out_be32(restart_reg_base + (RST_CTRL_REG >> 2), 0x2); + } else { + printk (KERN_EMERG "Error: Restart registers not mapped, spinning!\n"); + } + + for (;;) ; +} + +long __init mpc83xx_time_init(void) +{ +#define SPCR_OFFSET 0x00000110 +#define SPCR_TBEN 0x00400000 + __be32 __iomem *spcr = ioremap(get_immrbase() + SPCR_OFFSET, 4); + __be32 tmp; + + tmp = in_be32(spcr); + out_be32(spcr, tmp | SPCR_TBEN); + + iounmap(spcr); + + return 0; +} + +void __init mpc83xx_ipic_init_IRQ(void) +{ + struct device_node *np; + + /* looking for fsl,pq2pro-pic which is asl compatible with fsl,ipic */ + np = of_find_compatible_node(NULL, NULL, "fsl,ipic"); + if (!np) + np = of_find_node_by_type(NULL, "ipic"); + if (!np) + return; + + ipic_init(np, 0); + + of_node_put(np); + + /* Initialize the default interrupt mapping priorities, + * in case the boot rom changed something on us. + */ + ipic_set_default_priority(); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .type = "soc", }, + { .compatible = "soc", }, + { .compatible = "simple-bus" }, + { .compatible = "gianfar" }, + { .compatible = "gpio-leds", }, + { .type = "qe", }, + { .compatible = "fsl,qe", }, + {}, +}; + +int __init mpc83xx_declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + return 0; +} + +#ifdef CONFIG_PCI +void __init mpc83xx_setup_pci(void) +{ + struct device_node *np; + + for_each_compatible_node(np, "pci", "fsl,mpc8349-pci") + mpc83xx_add_bridge(np); + for_each_compatible_node(np, "pci", "fsl,mpc8314-pcie") + mpc83xx_add_bridge(np); +} +#endif + +void __init mpc83xx_setup_arch(void) +{ + phys_addr_t immrbase = get_immrbase(); + int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M; + unsigned long va = fix_to_virt(FIX_IMMR_BASE); + + if (ppc_md.progress) + ppc_md.progress("mpc83xx_setup_arch()", 0); + + setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG); + update_bats(); +} + +int machine_check_83xx(struct pt_regs *regs) +{ + u32 mask = 1 << (31 - IPIC_MCP_WDT); + + if (!(regs->msr & SRR1_MCE_MCP) || !(ipic_get_mcp_status() & mask)) + return machine_check_generic(regs); + ipic_clear_mcp_status(mask); + + if (debugger_fault_handler(regs)) + return 1; + + die("Watchdog NMI Reset", regs, 0); + + return 1; +} diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c new file mode 100644 index 0000000000..534bb22748 --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/83xx/mpc830x_rdb.c + * + * Description: MPC830x RDB board specific routines. + * This file is based on mpc831x_rdb.c + * + * Copyright (C) Freescale Semiconductor, Inc. 2009. All rights reserved. + * Copyright (C) 2010. Ilya Yanok, Emcraft Systems, yanok@emcraft.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include "mpc83xx.h" + +/* + * Setup the architecture + */ +static void __init mpc830x_rdb_setup_arch(void) +{ + mpc83xx_setup_arch(); + mpc831x_usb_cfg(); +} + +static const char *board[] __initdata = { + "MPC8308RDB", + "fsl,mpc8308rdb", + "denx,mpc8308_p1m", + NULL +}; + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc830x_rdb_probe(void) +{ + return of_device_compatible_match(of_root, board); +} + +machine_device_initcall(mpc830x_rdb, mpc83xx_declare_of_platform_devices); + +define_machine(mpc830x_rdb) { + .name = "MPC830x RDB", + .probe = mpc830x_rdb_probe, + .setup_arch = mpc830x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c new file mode 100644 index 0000000000..7b901ab3b8 --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/83xx/mpc831x_rdb.c + * + * Description: MPC831x RDB board specific routines. + * This file is based on mpc834x_sys.c + * Author: Lo Wlison + * + * Copyright (C) Freescale Semiconductor, Inc. 2006. All rights reserved. + */ + +#include +#include + +#include +#include +#include +#include + +#include "mpc83xx.h" + +/* + * Setup the architecture + */ +static void __init mpc831x_rdb_setup_arch(void) +{ + mpc83xx_setup_arch(); + mpc831x_usb_cfg(); +} + +static const char *board[] __initdata = { + "MPC8313ERDB", + "fsl,mpc8315erdb", + NULL +}; + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc831x_rdb_probe(void) +{ + return of_device_compatible_match(of_root, board); +} + +machine_device_initcall(mpc831x_rdb, mpc83xx_declare_of_platform_devices); + +define_machine(mpc831x_rdb) { + .name = "MPC831x RDB", + .probe = mpc831x_rdb_probe, + .setup_arch = mpc831x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c new file mode 100644 index 0000000000..d523ce0f48 --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/83xx/mpc832x_rdb.c + * + * Copyright (C) Freescale Semiconductor, Inc. 2007. All rights reserved. + * + * Description: + * MPC832x RDB board specific routines. + * This file is based on mpc832x_mds.c and mpc8313_rdb.c + * Author: Michael Barkowski + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mpc83xx.h" + +#undef DEBUG +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +#ifdef CONFIG_QUICC_ENGINE +static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, + struct spi_board_info *board_infos, + unsigned int num_board_infos, + void (*cs_control)(struct spi_device *dev, + bool on)) +{ + struct device_node *np; + unsigned int i = 0; + + for_each_compatible_node(np, type, compatible) { + int ret; + unsigned int j; + const void *prop; + struct resource res[2]; + struct platform_device *pdev; + struct fsl_spi_platform_data pdata = { + .cs_control = cs_control, + }; + + memset(res, 0, sizeof(res)); + + pdata.sysclk = sysclk; + + prop = of_get_property(np, "reg", NULL); + if (!prop) + goto err; + pdata.bus_num = *(u32 *)prop; + + prop = of_get_property(np, "cell-index", NULL); + if (prop) + i = *(u32 *)prop; + + prop = of_get_property(np, "mode", NULL); + if (prop && !strcmp(prop, "cpu-qe")) + pdata.flags = SPI_QE_CPU_MODE; + + for (j = 0; j < num_board_infos; j++) { + if (board_infos[j].bus_num == pdata.bus_num) + pdata.max_chipselect++; + } + + if (!pdata.max_chipselect) + continue; + + ret = of_address_to_resource(np, 0, &res[0]); + if (ret) + goto err; + + ret = of_irq_to_resource(np, 0, &res[1]); + if (ret <= 0) + goto err; + + pdev = platform_device_alloc("mpc83xx_spi", i); + if (!pdev) + goto err; + + ret = platform_device_add_data(pdev, &pdata, sizeof(pdata)); + if (ret) + goto unreg; + + ret = platform_device_add_resources(pdev, res, + ARRAY_SIZE(res)); + if (ret) + goto unreg; + + ret = platform_device_add(pdev); + if (ret) + goto unreg; + + goto next; +unreg: + platform_device_put(pdev); +err: + pr_err("%pOF: registration failed\n", np); +next: + i++; + } + + return i; +} + +static int __init fsl_spi_init(struct spi_board_info *board_infos, + unsigned int num_board_infos, + void (*cs_control)(struct spi_device *spi, + bool on)) +{ + u32 sysclk = -1; + int ret; + + /* SPI controller is either clocked from QE or SoC clock */ + sysclk = get_brgfreq(); + if (sysclk == -1) { + sysclk = fsl_get_sys_freq(); + if (sysclk == -1) + return -ENODEV; + } + + ret = of_fsl_spi_probe(NULL, "fsl,spi", sysclk, board_infos, + num_board_infos, cs_control); + if (!ret) + of_fsl_spi_probe("spi", "fsl_spi", sysclk, board_infos, + num_board_infos, cs_control); + + return spi_register_board_info(board_infos, num_board_infos); +} + +static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on) +{ + pr_debug("%s %d %d\n", __func__, spi_get_chipselect(spi, 0), on); + par_io_data_set(3, 13, on); +} + +static struct mmc_spi_platform_data mpc832x_mmc_pdata = { + .ocr_mask = MMC_VDD_33_34, +}; + +static struct spi_board_info mpc832x_spi_boardinfo = { + .bus_num = 0x4c0, + .chip_select = 0, + .max_speed_hz = 50000000, + .modalias = "mmc_spi", + .platform_data = &mpc832x_mmc_pdata, +}; + +static int __init mpc832x_spi_init(void) +{ + struct device_node *np; + + par_io_config_pin(3, 0, 3, 0, 1, 0); /* SPI1 MOSI, I/O */ + par_io_config_pin(3, 1, 3, 0, 1, 0); /* SPI1 MISO, I/O */ + par_io_config_pin(3, 2, 3, 0, 1, 0); /* SPI1 CLK, I/O */ + par_io_config_pin(3, 3, 2, 0, 1, 0); /* SPI1 SEL, I */ + + par_io_config_pin(3, 13, 1, 0, 0, 0); /* !SD_CS, O */ + par_io_config_pin(3, 14, 2, 0, 0, 0); /* SD_INSERT, I */ + par_io_config_pin(3, 15, 2, 0, 0, 0); /* SD_PROTECT,I */ + + /* + * Don't bother with legacy stuff when device tree contains + * mmc-spi-slot node. + */ + np = of_find_compatible_node(NULL, NULL, "mmc-spi-slot"); + of_node_put(np); + if (np) + return 0; + return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control); +} +machine_device_initcall(mpc832x_rdb, mpc832x_spi_init); +#endif /* CONFIG_QUICC_ENGINE */ + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init mpc832x_rdb_setup_arch(void) +{ +#if defined(CONFIG_QUICC_ENGINE) + struct device_node *np; +#endif + + mpc83xx_setup_arch(); + +#ifdef CONFIG_QUICC_ENGINE + if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) { + par_io_init(np); + of_node_put(np); + + for_each_node_by_name(np, "ucc") + par_io_of_config(np); + } +#endif /* CONFIG_QUICC_ENGINE */ +} + +machine_device_initcall(mpc832x_rdb, mpc83xx_declare_of_platform_devices); + +define_machine(mpc832x_rdb) { + .name = "MPC832x RDB", + .compatible = "MPC832xRDB", + .setup_arch = mpc832x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c new file mode 100644 index 0000000000..e45b98ff02 --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/83xx/mpc834x_itx.c + * + * MPC834x ITX board specific routines + * + * Maintainer: Kumar Gala + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc83xx.h" + +static const struct of_device_id mpc834x_itx_ids[] __initconst = { + { .compatible = "fsl,pq2pro-localbus", }, + {}, +}; + +static int __init mpc834x_itx_declare_of_platform_devices(void) +{ + mpc83xx_declare_of_platform_devices(); + return of_platform_bus_probe(NULL, mpc834x_itx_ids, NULL); +} +machine_device_initcall(mpc834x_itx, mpc834x_itx_declare_of_platform_devices); + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init mpc834x_itx_setup_arch(void) +{ + mpc83xx_setup_arch(); + + mpc834x_usb_cfg(); +} + +define_machine(mpc834x_itx) { + .name = "MPC834x ITX", + .compatible = "MPC834xMITX", + .setup_arch = mpc834x_itx_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c new file mode 100644 index 0000000000..1fc9d1235a --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MPC8360E-RDK board file. + * + * Copyright (c) 2006 Freescale Semiconductor, Inc. + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc83xx.h" + +machine_device_initcall(mpc836x_rdk, mpc83xx_declare_of_platform_devices); + +static void __init mpc836x_rdk_setup_arch(void) +{ + mpc83xx_setup_arch(); +} + +define_machine(mpc836x_rdk) { + .name = "MPC836x RDK", + .compatible = "fsl,mpc8360rdk", + .setup_arch = mpc836x_rdk_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c new file mode 100644 index 0000000000..39e78018dd --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/83xx/mpc837x_rdb.c + * + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * MPC837x RDB board specific routines + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "mpc83xx.h" + +static void __init mpc837x_rdb_sd_cfg(void) +{ + void __iomem *im; + + im = ioremap(get_immrbase(), 0x1000); + if (!im) { + WARN_ON(1); + return; + } + + /* + * On RDB boards (in contrast to MDS) USBB pins are used for SD only, + * so we can safely mux them away from the USB block. + */ + clrsetbits_be32(im + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USBB_MASK, + MPC837X_SICRL_SD); + clrsetbits_be32(im + MPC83XX_SICRH_OFFS, MPC837X_SICRH_SPI_MASK, + MPC837X_SICRH_SD); + iounmap(im); +} + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init mpc837x_rdb_setup_arch(void) +{ + mpc83xx_setup_arch(); + mpc837x_usb_cfg(); + mpc837x_rdb_sd_cfg(); +} + +machine_device_initcall(mpc837x_rdb, mpc83xx_declare_of_platform_devices); + +static const char * const board[] __initconst = { + "fsl,mpc8377rdb", + "fsl,mpc8378rdb", + "fsl,mpc8379rdb", + "fsl,mpc8377wlan", + NULL +}; + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init mpc837x_rdb_probe(void) +{ + return of_device_compatible_match(of_root, board); +} + +define_machine(mpc837x_rdb) { + .name = "MPC837x RDB/WLAN", + .probe = mpc837x_rdb_probe, + .setup_arch = mpc837x_rdb_setup_arch, + .discover_phbs = mpc83xx_setup_pci, + .init_IRQ = mpc83xx_ipic_init_IRQ, + .get_irq = ipic_get_irq, + .restart = mpc83xx_restart, + .time_init = mpc83xx_time_init, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h new file mode 100644 index 0000000000..0b8738a2b9 --- /dev/null +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __MPC83XX_H__ +#define __MPC83XX_H__ + +#include + +/* System Clock Control Register */ +#define MPC83XX_SCCR_OFFS 0xA08 +#define MPC83XX_SCCR_USB_MASK 0x00f00000 +#define MPC83XX_SCCR_USB_MPHCM_11 0x00c00000 +#define MPC83XX_SCCR_USB_MPHCM_01 0x00400000 +#define MPC83XX_SCCR_USB_MPHCM_10 0x00800000 +#define MPC83XX_SCCR_USB_DRCM_11 0x00300000 +#define MPC83XX_SCCR_USB_DRCM_01 0x00100000 +#define MPC83XX_SCCR_USB_DRCM_10 0x00200000 +#define MPC8315_SCCR_USB_MASK 0x00c00000 +#define MPC8315_SCCR_USB_DRCM_11 0x00c00000 +#define MPC8315_SCCR_USB_DRCM_01 0x00400000 +#define MPC837X_SCCR_USB_DRCM_11 0x00c00000 + +/* system i/o configuration register low */ +#define MPC83XX_SICRL_OFFS 0x114 +#define MPC834X_SICRL_USB_MASK 0x60000000 +#define MPC834X_SICRL_USB0 0x20000000 +#define MPC834X_SICRL_USB1 0x40000000 +#define MPC831X_SICRL_USB_MASK 0x00000c00 +#define MPC831X_SICRL_USB_ULPI 0x00000800 +#define MPC8315_SICRL_USB_MASK 0x000000fc +#define MPC8315_SICRL_USB_ULPI 0x00000054 +#define MPC837X_SICRL_USB_MASK 0xf0000000 +#define MPC837X_SICRL_USB_ULPI 0x50000000 +#define MPC837X_SICRL_USBB_MASK 0x30000000 +#define MPC837X_SICRL_SD 0x20000000 + +/* system i/o configuration register high */ +#define MPC83XX_SICRH_OFFS 0x118 +#define MPC8308_SICRH_USB_MASK 0x000c0000 +#define MPC8308_SICRH_USB_ULPI 0x00040000 +#define MPC834X_SICRH_USB_UTMI 0x00020000 +#define MPC831X_SICRH_USB_MASK 0x000000e0 +#define MPC831X_SICRH_USB_ULPI 0x000000a0 +#define MPC8315_SICRH_USB_MASK 0x0000ff00 +#define MPC8315_SICRH_USB_ULPI 0x00000000 +#define MPC837X_SICRH_SPI_MASK 0x00000003 +#define MPC837X_SICRH_SD 0x00000001 + +/* USB Control Register */ +#define FSL_USB2_CONTROL_OFFS 0x500 +#define CONTROL_UTMI_PHY_EN 0x00000200 +#define CONTROL_REFSEL_24MHZ 0x00000040 +#define CONTROL_REFSEL_48MHZ 0x00000080 +#define CONTROL_PHY_CLK_SEL_ULPI 0x00000400 +#define CONTROL_OTG_PORT 0x00000020 + +/* USB PORTSC Registers */ +#define FSL_USB2_PORTSC1_OFFS 0x184 +#define FSL_USB2_PORTSC2_OFFS 0x188 +#define PORTSCX_PTW_16BIT 0x10000000 +#define PORTSCX_PTS_UTMI 0x00000000 +#define PORTSCX_PTS_ULPI 0x80000000 + +/* + * Declaration for the various functions exported by the + * mpc83xx_* files. Mostly for use by mpc83xx_setup + */ + +extern void __noreturn mpc83xx_restart(char *cmd); +extern long mpc83xx_time_init(void); +int __init mpc837x_usb_cfg(void); +int __init mpc834x_usb_cfg(void); +int __init mpc831x_usb_cfg(void); +extern void mpc83xx_ipic_init_IRQ(void); + +#ifdef CONFIG_PCI +extern void mpc83xx_setup_pci(void); +#else +#define mpc83xx_setup_pci NULL +#endif + +extern int mpc83xx_declare_of_platform_devices(void); +extern void mpc83xx_setup_arch(void); + +#endif /* __MPC83XX_H__ */ diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S new file mode 100644 index 0000000000..bc6bd4d0ae --- /dev/null +++ b/arch/powerpc/platforms/83xx/suspend-asm.S @@ -0,0 +1,551 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Enter and leave deep sleep state on MPC83xx + * + * Copyright (c) 2006-2008 Freescale Semiconductor, Inc. + * Author: Scott Wood + */ + +#include +#include +#include +#include + +#define SS_MEMSAVE 0x00 /* First 8 bytes of RAM */ +#define SS_HID 0x08 /* 3 HIDs */ +#define SS_IABR 0x14 /* 2 IABRs */ +#define SS_IBCR 0x1c +#define SS_DABR 0x20 /* 2 DABRs */ +#define SS_DBCR 0x28 +#define SS_SP 0x2c +#define SS_SR 0x30 /* 16 segment registers */ +#define SS_R2 0x70 +#define SS_MSR 0x74 +#define SS_SDR1 0x78 +#define SS_LR 0x7c +#define SS_SPRG 0x80 /* 8 SPRGs */ +#define SS_DBAT 0xa0 /* 8 DBATs */ +#define SS_IBAT 0xe0 /* 8 IBATs */ +#define SS_TB 0x120 +#define SS_CR 0x128 +#define SS_GPREG 0x12c /* r12-r31 */ +#define STATE_SAVE_SIZE 0x17c + + .section .data + .align 5 + +mpc83xx_sleep_save_area: + .space STATE_SAVE_SIZE +immrbase: + .long 0 + + .section .text + .align 5 + + /* r3 = physical address of IMMR */ +_GLOBAL(mpc83xx_enter_deep_sleep) + lis r4, immrbase@ha + stw r3, immrbase@l(r4) + + /* The first 2 words of memory are used to communicate with the + * bootloader, to tell it how to resume. + * + * The first word is the magic number 0xf5153ae5, and the second + * is the pointer to mpc83xx_deep_resume. + * + * The original content of these two words is saved in SS_MEMSAVE. + */ + + lis r3, mpc83xx_sleep_save_area@h + ori r3, r3, mpc83xx_sleep_save_area@l + + lis r4, KERNELBASE@h + lwz r5, 0(r4) + lwz r6, 4(r4) + + stw r5, SS_MEMSAVE+0(r3) + stw r6, SS_MEMSAVE+4(r3) + + mfspr r5, SPRN_HID0 + mfspr r6, SPRN_HID1 + mfspr r7, SPRN_HID2 + + stw r5, SS_HID+0(r3) + stw r6, SS_HID+4(r3) + stw r7, SS_HID+8(r3) + + mfspr r4, SPRN_IABR + mfspr r5, SPRN_IABR2 + mfspr r6, SPRN_IBCR + mfspr r7, SPRN_DABR + mfspr r8, SPRN_DABR2 + mfspr r9, SPRN_DBCR + + stw r4, SS_IABR+0(r3) + stw r5, SS_IABR+4(r3) + stw r6, SS_IBCR(r3) + stw r7, SS_DABR+0(r3) + stw r8, SS_DABR+4(r3) + stw r9, SS_DBCR(r3) + + mfspr r4, SPRN_SPRG0 + mfspr r5, SPRN_SPRG1 + mfspr r6, SPRN_SPRG2 + mfspr r7, SPRN_SPRG3 + mfsdr1 r8 + + stw r4, SS_SPRG+0(r3) + stw r5, SS_SPRG+4(r3) + stw r6, SS_SPRG+8(r3) + stw r7, SS_SPRG+12(r3) + stw r8, SS_SDR1(r3) + + mfspr r4, SPRN_SPRG4 + mfspr r5, SPRN_SPRG5 + mfspr r6, SPRN_SPRG6 + mfspr r7, SPRN_SPRG7 + + stw r4, SS_SPRG+16(r3) + stw r5, SS_SPRG+20(r3) + stw r6, SS_SPRG+24(r3) + stw r7, SS_SPRG+28(r3) + + mfspr r4, SPRN_DBAT0U + mfspr r5, SPRN_DBAT0L + mfspr r6, SPRN_DBAT1U + mfspr r7, SPRN_DBAT1L + + stw r4, SS_DBAT+0x00(r3) + stw r5, SS_DBAT+0x04(r3) + stw r6, SS_DBAT+0x08(r3) + stw r7, SS_DBAT+0x0c(r3) + + mfspr r4, SPRN_DBAT2U + mfspr r5, SPRN_DBAT2L + mfspr r6, SPRN_DBAT3U + mfspr r7, SPRN_DBAT3L + + stw r4, SS_DBAT+0x10(r3) + stw r5, SS_DBAT+0x14(r3) + stw r6, SS_DBAT+0x18(r3) + stw r7, SS_DBAT+0x1c(r3) + + mfspr r4, SPRN_DBAT4U + mfspr r5, SPRN_DBAT4L + mfspr r6, SPRN_DBAT5U + mfspr r7, SPRN_DBAT5L + + stw r4, SS_DBAT+0x20(r3) + stw r5, SS_DBAT+0x24(r3) + stw r6, SS_DBAT+0x28(r3) + stw r7, SS_DBAT+0x2c(r3) + + mfspr r4, SPRN_DBAT6U + mfspr r5, SPRN_DBAT6L + mfspr r6, SPRN_DBAT7U + mfspr r7, SPRN_DBAT7L + + stw r4, SS_DBAT+0x30(r3) + stw r5, SS_DBAT+0x34(r3) + stw r6, SS_DBAT+0x38(r3) + stw r7, SS_DBAT+0x3c(r3) + + mfspr r4, SPRN_IBAT0U + mfspr r5, SPRN_IBAT0L + mfspr r6, SPRN_IBAT1U + mfspr r7, SPRN_IBAT1L + + stw r4, SS_IBAT+0x00(r3) + stw r5, SS_IBAT+0x04(r3) + stw r6, SS_IBAT+0x08(r3) + stw r7, SS_IBAT+0x0c(r3) + + mfspr r4, SPRN_IBAT2U + mfspr r5, SPRN_IBAT2L + mfspr r6, SPRN_IBAT3U + mfspr r7, SPRN_IBAT3L + + stw r4, SS_IBAT+0x10(r3) + stw r5, SS_IBAT+0x14(r3) + stw r6, SS_IBAT+0x18(r3) + stw r7, SS_IBAT+0x1c(r3) + + mfspr r4, SPRN_IBAT4U + mfspr r5, SPRN_IBAT4L + mfspr r6, SPRN_IBAT5U + mfspr r7, SPRN_IBAT5L + + stw r4, SS_IBAT+0x20(r3) + stw r5, SS_IBAT+0x24(r3) + stw r6, SS_IBAT+0x28(r3) + stw r7, SS_IBAT+0x2c(r3) + + mfspr r4, SPRN_IBAT6U + mfspr r5, SPRN_IBAT6L + mfspr r6, SPRN_IBAT7U + mfspr r7, SPRN_IBAT7L + + stw r4, SS_IBAT+0x30(r3) + stw r5, SS_IBAT+0x34(r3) + stw r6, SS_IBAT+0x38(r3) + stw r7, SS_IBAT+0x3c(r3) + + mfmsr r4 + mflr r5 + mfcr r6 + + stw r4, SS_MSR(r3) + stw r5, SS_LR(r3) + stw r6, SS_CR(r3) + stw r1, SS_SP(r3) + stw r2, SS_R2(r3) + +1: mftbu r4 + mftb r5 + mftbu r6 + cmpw r4, r6 + bne 1b + + stw r4, SS_TB+0(r3) + stw r5, SS_TB+4(r3) + + stmw r12, SS_GPREG(r3) + + li r4, 0 + addi r6, r3, SS_SR-4 +1: mfsrin r5, r4 + stwu r5, 4(r6) + addis r4, r4, 0x1000 + cmpwi r4, 0 + bne 1b + + /* Disable machine checks and critical exceptions */ + mfmsr r4 + rlwinm r4, r4, 0, ~MSR_CE + rlwinm r4, r4, 0, ~MSR_ME + mtmsr r4 + isync + +#define TMP_VIRT_IMMR 0xf0000000 +#define DEFAULT_IMMR_VALUE 0xff400000 +#define IMMRBAR_BASE 0x0000 + + lis r4, immrbase@ha + lwz r4, immrbase@l(r4) + + /* Use DBAT0 to address the current IMMR space */ + + ori r4, r4, 0x002a + mtspr SPRN_DBAT0L, r4 + lis r8, TMP_VIRT_IMMR@h + ori r4, r8, 0x001e /* 1 MByte accessible from Kernel Space only */ + mtspr SPRN_DBAT0U, r4 + isync + + /* Use DBAT1 to address the original IMMR space */ + + lis r4, DEFAULT_IMMR_VALUE@h + ori r4, r4, 0x002a + mtspr SPRN_DBAT1L, r4 + lis r9, (TMP_VIRT_IMMR + 0x01000000)@h + ori r4, r9, 0x001e /* 1 MByte accessible from Kernel Space only */ + mtspr SPRN_DBAT1U, r4 + isync + + /* Use DBAT2 to address the beginning of RAM. This isn't done + * using the normal virtual mapping, because with page debugging + * enabled it will be read-only. + */ + + li r4, 0x0002 + mtspr SPRN_DBAT2L, r4 + lis r4, KERNELBASE@h + ori r4, r4, 0x001e /* 1 MByte accessible from Kernel Space only */ + mtspr SPRN_DBAT2U, r4 + isync + + /* Flush the cache with our BAT, as there will be TLB misses + * otherwise if page debugging is enabled, and these misses + * will disturb the PLRU algorithm. + */ + + bl __flush_disable_L1 + + /* Keep the i-cache enabled, so the hack below for low-boot + * flash will work. + */ + mfspr r3, SPRN_HID0 + ori r3, r3, HID0_ICE + mtspr SPRN_HID0, r3 + isync + + lis r6, 0xf515 + ori r6, r6, 0x3ae5 + + lis r7, mpc83xx_deep_resume@h + ori r7, r7, mpc83xx_deep_resume@l + tophys(r7, r7) + + lis r5, KERNELBASE@h + stw r6, 0(r5) + stw r7, 4(r5) + + /* Reset BARs */ + + li r4, 0 + stw r4, 0x0024(r8) + stw r4, 0x002c(r8) + stw r4, 0x0034(r8) + stw r4, 0x003c(r8) + stw r4, 0x0064(r8) + stw r4, 0x006c(r8) + + /* Rev 1 of the 8313 has problems with wakeup events that are + * pending during the transition to deep sleep state (such as if + * the PCI host sets the state to D3 and then D0 in rapid + * succession). This check shrinks the race window somewhat. + * + * See erratum PCI23, though the problem is not limited + * to PCI. + */ + + lwz r3, 0x0b04(r8) + andi. r3, r3, 1 + bne- mpc83xx_deep_resume + + /* Move IMMR back to the default location, following the + * procedure specified in the MPC8313 manual. + */ + lwz r4, IMMRBAR_BASE(r8) + isync + lis r4, DEFAULT_IMMR_VALUE@h + stw r4, IMMRBAR_BASE(r8) + lis r4, KERNELBASE@h + lwz r4, 0(r4) + isync + lwz r4, IMMRBAR_BASE(r9) + mr r8, r9 + isync + + /* Check the Reset Configuration Word to see whether flash needs + * to be mapped at a low address or a high address. + */ + + lwz r4, 0x0904(r8) + andis. r4, r4, 0x0400 + li r4, 0 + beq boot_low + lis r4, 0xff80 +boot_low: + stw r4, 0x0020(r8) + lis r7, 0x8000 + ori r7, r7, 0x0016 + + mfspr r5, SPRN_HID0 + rlwinm r5, r5, 0, ~(HID0_DOZE | HID0_NAP) + oris r5, r5, HID0_SLEEP@h + mtspr SPRN_HID0, r5 + isync + + mfmsr r5 + oris r5, r5, MSR_POW@h + + /* Enable the flash mapping at the appropriate address. This + * mapping will override the RAM mapping if booting low, so there's + * no need to disable the latter. This must be done inside the same + * cache line as setting MSR_POW, so that no instruction fetches + * from RAM happen after the flash mapping is turned on. + */ + + .align 5 + stw r7, 0x0024(r8) + sync + isync + mtmsr r5 + isync +1: b 1b + +mpc83xx_deep_resume: + lis r4, 1f@h + ori r4, r4, 1f@l + tophys(r4, r4) + mtsrr0 r4 + + mfmsr r4 + rlwinm r4, r4, 0, ~(MSR_IR | MSR_DR) + mtsrr1 r4 + + rfi + +1: tlbia + bl __inval_enable_L1 + + lis r3, mpc83xx_sleep_save_area@h + ori r3, r3, mpc83xx_sleep_save_area@l + tophys(r3, r3) + + lwz r5, SS_MEMSAVE+0(r3) + lwz r6, SS_MEMSAVE+4(r3) + + stw r5, 0(0) + stw r6, 4(0) + + lwz r5, SS_HID+0(r3) + lwz r6, SS_HID+4(r3) + lwz r7, SS_HID+8(r3) + + mtspr SPRN_HID0, r5 + mtspr SPRN_HID1, r6 + mtspr SPRN_HID2, r7 + + lwz r4, SS_IABR+0(r3) + lwz r5, SS_IABR+4(r3) + lwz r6, SS_IBCR(r3) + lwz r7, SS_DABR+0(r3) + lwz r8, SS_DABR+4(r3) + lwz r9, SS_DBCR(r3) + + mtspr SPRN_IABR, r4 + mtspr SPRN_IABR2, r5 + mtspr SPRN_IBCR, r6 + mtspr SPRN_DABR, r7 + mtspr SPRN_DABR2, r8 + mtspr SPRN_DBCR, r9 + + li r4, 0 + addi r6, r3, SS_SR-4 +1: lwzu r5, 4(r6) + mtsrin r5, r4 + addis r4, r4, 0x1000 + cmpwi r4, 0 + bne 1b + + lwz r4, SS_DBAT+0x00(r3) + lwz r5, SS_DBAT+0x04(r3) + lwz r6, SS_DBAT+0x08(r3) + lwz r7, SS_DBAT+0x0c(r3) + + mtspr SPRN_DBAT0U, r4 + mtspr SPRN_DBAT0L, r5 + mtspr SPRN_DBAT1U, r6 + mtspr SPRN_DBAT1L, r7 + + lwz r4, SS_DBAT+0x10(r3) + lwz r5, SS_DBAT+0x14(r3) + lwz r6, SS_DBAT+0x18(r3) + lwz r7, SS_DBAT+0x1c(r3) + + mtspr SPRN_DBAT2U, r4 + mtspr SPRN_DBAT2L, r5 + mtspr SPRN_DBAT3U, r6 + mtspr SPRN_DBAT3L, r7 + + lwz r4, SS_DBAT+0x20(r3) + lwz r5, SS_DBAT+0x24(r3) + lwz r6, SS_DBAT+0x28(r3) + lwz r7, SS_DBAT+0x2c(r3) + + mtspr SPRN_DBAT4U, r4 + mtspr SPRN_DBAT4L, r5 + mtspr SPRN_DBAT5U, r6 + mtspr SPRN_DBAT5L, r7 + + lwz r4, SS_DBAT+0x30(r3) + lwz r5, SS_DBAT+0x34(r3) + lwz r6, SS_DBAT+0x38(r3) + lwz r7, SS_DBAT+0x3c(r3) + + mtspr SPRN_DBAT6U, r4 + mtspr SPRN_DBAT6L, r5 + mtspr SPRN_DBAT7U, r6 + mtspr SPRN_DBAT7L, r7 + + lwz r4, SS_IBAT+0x00(r3) + lwz r5, SS_IBAT+0x04(r3) + lwz r6, SS_IBAT+0x08(r3) + lwz r7, SS_IBAT+0x0c(r3) + + mtspr SPRN_IBAT0U, r4 + mtspr SPRN_IBAT0L, r5 + mtspr SPRN_IBAT1U, r6 + mtspr SPRN_IBAT1L, r7 + + lwz r4, SS_IBAT+0x10(r3) + lwz r5, SS_IBAT+0x14(r3) + lwz r6, SS_IBAT+0x18(r3) + lwz r7, SS_IBAT+0x1c(r3) + + mtspr SPRN_IBAT2U, r4 + mtspr SPRN_IBAT2L, r5 + mtspr SPRN_IBAT3U, r6 + mtspr SPRN_IBAT3L, r7 + + lwz r4, SS_IBAT+0x20(r3) + lwz r5, SS_IBAT+0x24(r3) + lwz r6, SS_IBAT+0x28(r3) + lwz r7, SS_IBAT+0x2c(r3) + + mtspr SPRN_IBAT4U, r4 + mtspr SPRN_IBAT4L, r5 + mtspr SPRN_IBAT5U, r6 + mtspr SPRN_IBAT5L, r7 + + lwz r4, SS_IBAT+0x30(r3) + lwz r5, SS_IBAT+0x34(r3) + lwz r6, SS_IBAT+0x38(r3) + lwz r7, SS_IBAT+0x3c(r3) + + mtspr SPRN_IBAT6U, r4 + mtspr SPRN_IBAT6L, r5 + mtspr SPRN_IBAT7U, r6 + mtspr SPRN_IBAT7L, r7 + + lwz r4, SS_SPRG+16(r3) + lwz r5, SS_SPRG+20(r3) + lwz r6, SS_SPRG+24(r3) + lwz r7, SS_SPRG+28(r3) + + mtspr SPRN_SPRG4, r4 + mtspr SPRN_SPRG5, r5 + mtspr SPRN_SPRG6, r6 + mtspr SPRN_SPRG7, r7 + + lwz r4, SS_SPRG+0(r3) + lwz r5, SS_SPRG+4(r3) + lwz r6, SS_SPRG+8(r3) + lwz r7, SS_SPRG+12(r3) + lwz r8, SS_SDR1(r3) + + mtspr SPRN_SPRG0, r4 + mtspr SPRN_SPRG1, r5 + mtspr SPRN_SPRG2, r6 + mtspr SPRN_SPRG3, r7 + mtsdr1 r8 + + lwz r4, SS_MSR(r3) + lwz r5, SS_LR(r3) + lwz r6, SS_CR(r3) + lwz r1, SS_SP(r3) + lwz r2, SS_R2(r3) + + mtsrr1 r4 + mtsrr0 r5 + mtcr r6 + + li r4, 0 + mtspr SPRN_TBWL, r4 + + lwz r4, SS_TB+0(r3) + lwz r5, SS_TB+4(r3) + + mtspr SPRN_TBWU, r4 + mtspr SPRN_TBWL, r5 + + lmw r12, SS_GPREG(r3) + + /* Kick decrementer */ + li r0, 1 + mtdec r0 + + rfi +_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume) diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c new file mode 100644 index 0000000000..9833c36bda --- /dev/null +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MPC83xx suspend support + * + * Author: Scott Wood + * + * Copyright (c) 2006-2007 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define PMCCR1_NEXT_STATE 0x0C /* Next state for power management */ +#define PMCCR1_NEXT_STATE_SHIFT 2 +#define PMCCR1_CURR_STATE 0x03 /* Current state for power management*/ +#define IMMR_SYSCR_OFFSET 0x100 +#define IMMR_RCW_OFFSET 0x900 +#define RCW_PCI_HOST 0x80000000 + +void mpc83xx_enter_deep_sleep(phys_addr_t immrbase); + +struct mpc83xx_pmc { + u32 config; +#define PMCCR_DLPEN 2 /* DDR SDRAM low power enable */ +#define PMCCR_SLPEN 1 /* System low power enable */ + + u32 event; + u32 mask; +/* All but PMCI are deep-sleep only */ +#define PMCER_GPIO 0x100 +#define PMCER_PCI 0x080 +#define PMCER_USB 0x040 +#define PMCER_ETSEC1 0x020 +#define PMCER_ETSEC2 0x010 +#define PMCER_TIMER 0x008 +#define PMCER_INT1 0x004 +#define PMCER_INT2 0x002 +#define PMCER_PMCI 0x001 +#define PMCER_ALL 0x1FF + + /* deep-sleep only */ + u32 config1; +#define PMCCR1_USE_STATE 0x80000000 +#define PMCCR1_PME_EN 0x00000080 +#define PMCCR1_ASSERT_PME 0x00000040 +#define PMCCR1_POWER_OFF 0x00000020 + + /* deep-sleep only */ + u32 config2; +}; + +struct mpc83xx_rcw { + u32 rcwlr; + u32 rcwhr; +}; + +struct mpc83xx_clock { + u32 spmr; + u32 occr; + u32 sccr; +}; + +struct mpc83xx_syscr { + __be32 sgprl; + __be32 sgprh; + __be32 spridr; + __be32 :32; + __be32 spcr; + __be32 sicrl; + __be32 sicrh; +}; + +struct mpc83xx_saved { + u32 sicrl; + u32 sicrh; + u32 sccr; +}; + +struct pmc_type { + int has_deep_sleep; +}; + +static int has_deep_sleep, deep_sleeping; +static int pmc_irq; +static struct mpc83xx_pmc __iomem *pmc_regs; +static struct mpc83xx_clock __iomem *clock_regs; +static struct mpc83xx_syscr __iomem *syscr_regs; +static struct mpc83xx_saved saved_regs; +static int is_pci_agent, wake_from_pci; +static phys_addr_t immrbase; +static int pci_pm_state; +static DECLARE_WAIT_QUEUE_HEAD(agent_wq); + +int fsl_deep_sleep(void) +{ + return deep_sleeping; +} +EXPORT_SYMBOL(fsl_deep_sleep); + +static int mpc83xx_change_state(void) +{ + u32 curr_state; + u32 reg_cfg1 = in_be32(&pmc_regs->config1); + + if (is_pci_agent) { + pci_pm_state = (reg_cfg1 & PMCCR1_NEXT_STATE) >> + PMCCR1_NEXT_STATE_SHIFT; + curr_state = reg_cfg1 & PMCCR1_CURR_STATE; + + if (curr_state != pci_pm_state) { + reg_cfg1 &= ~PMCCR1_CURR_STATE; + reg_cfg1 |= pci_pm_state; + out_be32(&pmc_regs->config1, reg_cfg1); + + wake_up(&agent_wq); + return 1; + } + } + + return 0; +} + +static irqreturn_t pmc_irq_handler(int irq, void *dev_id) +{ + u32 event = in_be32(&pmc_regs->event); + int ret = IRQ_NONE; + + if (mpc83xx_change_state()) + ret = IRQ_HANDLED; + + if (event) { + out_be32(&pmc_regs->event, event); + ret = IRQ_HANDLED; + } + + return ret; +} + +static void mpc83xx_suspend_restore_regs(void) +{ + out_be32(&syscr_regs->sicrl, saved_regs.sicrl); + out_be32(&syscr_regs->sicrh, saved_regs.sicrh); + out_be32(&clock_regs->sccr, saved_regs.sccr); +} + +static void mpc83xx_suspend_save_regs(void) +{ + saved_regs.sicrl = in_be32(&syscr_regs->sicrl); + saved_regs.sicrh = in_be32(&syscr_regs->sicrh); + saved_regs.sccr = in_be32(&clock_regs->sccr); +} + +static int mpc83xx_suspend_enter(suspend_state_t state) +{ + int ret = -EAGAIN; + + /* Don't go to sleep if there's a race where pci_pm_state changes + * between the agent thread checking it and the PM code disabling + * interrupts. + */ + if (wake_from_pci) { + if (pci_pm_state != (deep_sleeping ? 3 : 2)) + goto out; + + out_be32(&pmc_regs->config1, + in_be32(&pmc_regs->config1) | PMCCR1_PME_EN); + } + + /* Put the system into low-power mode and the RAM + * into self-refresh mode once the core goes to + * sleep. + */ + + out_be32(&pmc_regs->config, PMCCR_SLPEN | PMCCR_DLPEN); + + /* If it has deep sleep (i.e. it's an 831x or compatible), + * disable power to the core upon entering sleep mode. This will + * require going through the boot firmware upon a wakeup event. + */ + + if (deep_sleeping) { + mpc83xx_suspend_save_regs(); + + out_be32(&pmc_regs->mask, PMCER_ALL); + + out_be32(&pmc_regs->config1, + in_be32(&pmc_regs->config1) | PMCCR1_POWER_OFF); + + enable_kernel_fp(); + + mpc83xx_enter_deep_sleep(immrbase); + + out_be32(&pmc_regs->config1, + in_be32(&pmc_regs->config1) & ~PMCCR1_POWER_OFF); + + out_be32(&pmc_regs->mask, PMCER_PMCI); + + mpc83xx_suspend_restore_regs(); + } else { + out_be32(&pmc_regs->mask, PMCER_PMCI); + + mpc6xx_enter_standby(); + } + + ret = 0; + +out: + out_be32(&pmc_regs->config1, + in_be32(&pmc_regs->config1) & ~PMCCR1_PME_EN); + + return ret; +} + +static void mpc83xx_suspend_end(void) +{ + deep_sleeping = 0; +} + +static int mpc83xx_suspend_valid(suspend_state_t state) +{ + return state == PM_SUSPEND_STANDBY || state == PM_SUSPEND_MEM; +} + +static int mpc83xx_suspend_begin(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_STANDBY: + deep_sleeping = 0; + return 0; + + case PM_SUSPEND_MEM: + if (has_deep_sleep) + deep_sleeping = 1; + + return 0; + + default: + return -EINVAL; + } +} + +static int agent_thread_fn(void *data) +{ + while (1) { + wait_event_interruptible(agent_wq, pci_pm_state >= 2); + try_to_freeze(); + + if (signal_pending(current) || pci_pm_state < 2) + continue; + + /* With a preemptible kernel (or SMP), this could race with + * a userspace-driven suspend request. It's probably best + * to avoid mixing the two with such a configuration (or + * else fix it by adding a mutex to state_store that we can + * synchronize with). + */ + + wake_from_pci = 1; + + pm_suspend(pci_pm_state == 3 ? PM_SUSPEND_MEM : + PM_SUSPEND_STANDBY); + + wake_from_pci = 0; + } + + return 0; +} + +static void mpc83xx_set_agent(void) +{ + out_be32(&pmc_regs->config1, PMCCR1_USE_STATE); + out_be32(&pmc_regs->mask, PMCER_PMCI); + + kthread_run(agent_thread_fn, NULL, "PCI power mgt"); +} + +static int mpc83xx_is_pci_agent(void) +{ + struct mpc83xx_rcw __iomem *rcw_regs; + int ret; + + rcw_regs = ioremap(get_immrbase() + IMMR_RCW_OFFSET, + sizeof(struct mpc83xx_rcw)); + + if (!rcw_regs) + return -ENOMEM; + + ret = !(in_be32(&rcw_regs->rcwhr) & RCW_PCI_HOST); + + iounmap(rcw_regs); + return ret; +} + +static const struct platform_suspend_ops mpc83xx_suspend_ops = { + .valid = mpc83xx_suspend_valid, + .begin = mpc83xx_suspend_begin, + .enter = mpc83xx_suspend_enter, + .end = mpc83xx_suspend_end, +}; + +static struct pmc_type pmc_types[] = { + { + .has_deep_sleep = 1, + }, + { + .has_deep_sleep = 0, + } +}; + +static const struct of_device_id pmc_match[] = { + { + .compatible = "fsl,mpc8313-pmc", + .data = &pmc_types[0], + }, + { + .compatible = "fsl,mpc8349-pmc", + .data = &pmc_types[1], + }, + {} +}; + +static int pmc_probe(struct platform_device *ofdev) +{ + struct device_node *np = ofdev->dev.of_node; + struct resource res; + const struct pmc_type *type; + int ret = 0; + + type = of_device_get_match_data(&ofdev->dev); + if (!type) + return -EINVAL; + + if (!of_device_is_available(np)) + return -ENODEV; + + has_deep_sleep = type->has_deep_sleep; + immrbase = get_immrbase(); + + is_pci_agent = mpc83xx_is_pci_agent(); + if (is_pci_agent < 0) + return is_pci_agent; + + ret = of_address_to_resource(np, 0, &res); + if (ret) + return -ENODEV; + + pmc_irq = irq_of_parse_and_map(np, 0); + if (pmc_irq) { + ret = request_irq(pmc_irq, pmc_irq_handler, IRQF_SHARED, + "pmc", ofdev); + + if (ret) + return -EBUSY; + } + + pmc_regs = ioremap(res.start, sizeof(*pmc_regs)); + + if (!pmc_regs) { + ret = -ENOMEM; + goto out; + } + + ret = of_address_to_resource(np, 1, &res); + if (ret) { + ret = -ENODEV; + goto out_pmc; + } + + clock_regs = ioremap(res.start, sizeof(*clock_regs)); + + if (!clock_regs) { + ret = -ENOMEM; + goto out_pmc; + } + + if (has_deep_sleep) { + syscr_regs = ioremap(immrbase + IMMR_SYSCR_OFFSET, + sizeof(*syscr_regs)); + if (!syscr_regs) { + ret = -ENOMEM; + goto out_syscr; + } + } + + if (is_pci_agent) + mpc83xx_set_agent(); + + suspend_set_ops(&mpc83xx_suspend_ops); + return 0; + +out_syscr: + iounmap(clock_regs); +out_pmc: + iounmap(pmc_regs); +out: + if (pmc_irq) + free_irq(pmc_irq, ofdev); + + return ret; +} + +static struct platform_driver pmc_driver = { + .driver = { + .name = "mpc83xx-pmc", + .of_match_table = pmc_match, + .suppress_bind_attrs = true, + }, + .probe = pmc_probe, +}; + +builtin_platform_driver(pmc_driver); diff --git a/arch/powerpc/platforms/83xx/usb_831x.c b/arch/powerpc/platforms/83xx/usb_831x.c new file mode 100644 index 0000000000..28c24e90f0 --- /dev/null +++ b/arch/powerpc/platforms/83xx/usb_831x.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale 83xx USB SOC setup code + * + * Copyright (C) 2007 Freescale Semiconductor, Inc. + * Author: Li Yang + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpc83xx.h" + +int __init mpc831x_usb_cfg(void) +{ + u32 temp; + void __iomem *immap, *usb_regs; + struct device_node *np = NULL; + struct device_node *immr_node = NULL; + const void *prop; + struct resource res; + int ret = 0; +#ifdef CONFIG_USB_OTG + const void *dr_mode; +#endif + + np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); + if (!np) + return -ENODEV; + prop = of_get_property(np, "phy_type", NULL); + + /* Map IMMR space for pin and clock settings */ + immap = ioremap(get_immrbase(), 0x1000); + if (!immap) { + of_node_put(np); + return -ENOMEM; + } + + /* Configure clock */ + immr_node = of_get_parent(np); + if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") || + of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))) + clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, + MPC8315_SCCR_USB_MASK, + MPC8315_SCCR_USB_DRCM_01); + else + clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, + MPC83XX_SCCR_USB_MASK, + MPC83XX_SCCR_USB_DRCM_11); + + /* Configure pin mux for ULPI. There is no pin mux for UTMI */ + if (prop && !strcmp(prop, "ulpi")) { + if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) { + clrsetbits_be32(immap + MPC83XX_SICRH_OFFS, + MPC8308_SICRH_USB_MASK, + MPC8308_SICRH_USB_ULPI); + } else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) { + clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, + MPC8315_SICRL_USB_MASK, + MPC8315_SICRL_USB_ULPI); + clrsetbits_be32(immap + MPC83XX_SICRH_OFFS, + MPC8315_SICRH_USB_MASK, + MPC8315_SICRH_USB_ULPI); + } else { + clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, + MPC831X_SICRL_USB_MASK, + MPC831X_SICRL_USB_ULPI); + clrsetbits_be32(immap + MPC83XX_SICRH_OFFS, + MPC831X_SICRH_USB_MASK, + MPC831X_SICRH_USB_ULPI); + } + } + + iounmap(immap); + + of_node_put(immr_node); + + /* Map USB SOC space */ + ret = of_address_to_resource(np, 0, &res); + if (ret) { + of_node_put(np); + return ret; + } + usb_regs = ioremap(res.start, resource_size(&res)); + + /* Using on-chip PHY */ + if (prop && (!strcmp(prop, "utmi_wide") || !strcmp(prop, "utmi"))) { + u32 refsel; + + if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) + goto out; + + if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) + refsel = CONTROL_REFSEL_24MHZ; + else + refsel = CONTROL_REFSEL_48MHZ; + /* Set UTMI_PHY_EN and REFSEL */ + out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, + CONTROL_UTMI_PHY_EN | refsel); + /* Using external UPLI PHY */ + } else if (prop && !strcmp(prop, "ulpi")) { + /* Set PHY_CLK_SEL to ULPI */ + temp = CONTROL_PHY_CLK_SEL_ULPI; +#ifdef CONFIG_USB_OTG + /* Set OTG_PORT */ + if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) { + dr_mode = of_get_property(np, "dr_mode", NULL); + if (dr_mode && !strcmp(dr_mode, "otg")) + temp |= CONTROL_OTG_PORT; + } +#endif /* CONFIG_USB_OTG */ + out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp); + } else { + pr_warn("831x USB PHY type not supported\n"); + ret = -EINVAL; + } + +out: + iounmap(usb_regs); + of_node_put(np); + return ret; +} diff --git a/arch/powerpc/platforms/83xx/usb_834x.c b/arch/powerpc/platforms/83xx/usb_834x.c new file mode 100644 index 0000000000..3a8d6c662d --- /dev/null +++ b/arch/powerpc/platforms/83xx/usb_834x.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale 83xx USB SOC setup code + * + * Copyright (C) 2007 Freescale Semiconductor, Inc. + * Author: Li Yang + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpc83xx.h" + +int __init mpc834x_usb_cfg(void) +{ + unsigned long sccr, sicrl, sicrh; + void __iomem *immap; + struct device_node *np = NULL; + int port0_is_dr = 0, port1_is_dr = 0; + const void *prop, *dr_mode; + + immap = ioremap(get_immrbase(), 0x1000); + if (!immap) + return -ENOMEM; + + /* Read registers */ + /* Note: DR and MPH must use the same clock setting in SCCR */ + sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK; + sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK; + sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI; + + np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); + if (np) { + sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */ + + prop = of_get_property(np, "phy_type", NULL); + port1_is_dr = 1; + if (prop && + (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) { + sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1; + sicrh |= MPC834X_SICRH_USB_UTMI; + port0_is_dr = 1; + } else if (prop && !strcmp(prop, "serial")) { + dr_mode = of_get_property(np, "dr_mode", NULL); + if (dr_mode && !strcmp(dr_mode, "otg")) { + sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1; + port0_is_dr = 1; + } else { + sicrl |= MPC834X_SICRL_USB1; + } + } else if (prop && !strcmp(prop, "ulpi")) { + sicrl |= MPC834X_SICRL_USB1; + } else { + pr_warn("834x USB PHY type not supported\n"); + } + of_node_put(np); + } + np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph"); + if (np) { + sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */ + + prop = of_get_property(np, "port0", NULL); + if (prop) { + if (port0_is_dr) + pr_warn("834x USB port0 can't be used by both DR and MPH!\n"); + sicrl &= ~MPC834X_SICRL_USB0; + } + prop = of_get_property(np, "port1", NULL); + if (prop) { + if (port1_is_dr) + pr_warn("834x USB port1 can't be used by both DR and MPH!\n"); + sicrl &= ~MPC834X_SICRL_USB1; + } + of_node_put(np); + } + + /* Write back */ + out_be32(immap + MPC83XX_SCCR_OFFS, sccr); + out_be32(immap + MPC83XX_SICRL_OFFS, sicrl); + out_be32(immap + MPC83XX_SICRH_OFFS, sicrh); + + iounmap(immap); + return 0; +} diff --git a/arch/powerpc/platforms/83xx/usb_837x.c b/arch/powerpc/platforms/83xx/usb_837x.c new file mode 100644 index 0000000000..726935bb6e --- /dev/null +++ b/arch/powerpc/platforms/83xx/usb_837x.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale 83xx USB SOC setup code + * + * Copyright (C) 2007 Freescale Semiconductor, Inc. + * Author: Li Yang + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpc83xx.h" + +int __init mpc837x_usb_cfg(void) +{ + void __iomem *immap; + struct device_node *np = NULL; + const void *prop; + int ret = 0; + + np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); + if (!np || !of_device_is_available(np)) { + of_node_put(np); + return -ENODEV; + } + prop = of_get_property(np, "phy_type", NULL); + + if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) { + pr_warn("837x USB PHY type not supported\n"); + of_node_put(np); + return -EINVAL; + } + + /* Map IMMR space for pin and clock settings */ + immap = ioremap(get_immrbase(), 0x1000); + if (!immap) { + of_node_put(np); + return -ENOMEM; + } + + /* Configure clock */ + clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11, + MPC837X_SCCR_USB_DRCM_11); + + /* Configure pin mux for ULPI/serial */ + clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK, + MPC837X_SICRL_USB_ULPI); + + iounmap(immap); + of_node_put(np); + return ret; +} diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig new file mode 100644 index 0000000000..9315a3b69d --- /dev/null +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -0,0 +1,291 @@ +# SPDX-License-Identifier: GPL-2.0 +menuconfig FSL_SOC_BOOKE + bool "Freescale Book-E Machine Type" + depends on PPC_E500 + select FSL_SOC + select PPC_UDBG_16550 + select MPIC + select HAVE_PCI + select FSL_PCI if PCI + select SERIAL_8250_EXTENDED if SERIAL_8250 + select SERIAL_8250_SHARE_IRQ if SERIAL_8250 + select FSL_CORENET_RCPM if PPC_E500MC + default y + +if FSL_SOC_BOOKE + +if PPC32 + +config BSC9131_RDB + bool "Freescale BSC9131RDB" + select DEFAULT_UIMAGE + help + This option enables support for the Freescale BSC9131RDB board. + The BSC9131 is a heterogeneous SoC containing an e500v2 powerpc and a + StarCore SC3850 DSP + Manufacturer : Freescale Semiconductor, Inc + +config C293_PCIE + bool "Freescale C293PCIE" + select DEFAULT_UIMAGE + help + This option enables support for the C293PCIE board + +config BSC9132_QDS + bool "Freescale BSC9132QDS" + select DEFAULT_UIMAGE + help + This option enables support for the Freescale BSC9132 QDS board. + BSC9132 is a heterogeneous SoC containing dual e500v2 powerpc cores + and dual StarCore SC3850 DSP cores. + Manufacturer : Freescale Semiconductor, Inc + +config MPC8540_ADS + bool "Freescale MPC8540 ADS" + select DEFAULT_UIMAGE + help + This option enables support for the MPC 8540 ADS board + +config MPC8560_ADS + bool "Freescale MPC8560 ADS" + select DEFAULT_UIMAGE + select CPM2 + help + This option enables support for the MPC 8560 ADS board + +config MPC85xx_CDS + bool "Freescale MPC85xx CDS" + select DEFAULT_UIMAGE + select PPC_I8259 + select HAVE_RAPIDIO + help + This option enables support for the MPC85xx CDS board + +config MPC85xx_MDS + bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS" + select DEFAULT_UIMAGE + select PHYLIB if NETDEVICES + select HAVE_RAPIDIO + select SWIOTLB + help + This option enables support for the MPC8568 MDS, MPC8569 MDS and P1021 MDS boards + +config MPC8536_DS + bool "Freescale MPC8536 DS" + select DEFAULT_UIMAGE + select SWIOTLB + help + This option enables support for the MPC8536 DS board + +config MPC85xx_DS + bool "Freescale MPC8544 DS / MPC8572 DS" + select PPC_I8259 + select DEFAULT_UIMAGE + select FSL_ULI1575 if PCI + select SWIOTLB + help + This option enables support for the MPC8544 DS and MPC8572 DS boards + +config MPC85xx_RDB + bool "Freescale P102x MBG/UTM/RDB" + select PPC_I8259 + select DEFAULT_UIMAGE + select SWIOTLB + help + This option enables support for the P1020 MBG PC, P1020 UTM PC, + P1020 RDB PC, P1020 RDB PD, P1020 RDB, P1021 RDB PC, P1024 RDB, + and P1025 RDB boards + +config PPC_P2020 + bool "Freescale P2020" + default y if MPC85xx_DS || MPC85xx_RDB + select DEFAULT_UIMAGE + select SWIOTLB + imply PPC_I8259 + imply FSL_ULI1575 if PCI + help + This option enables generic unified support for any board with the + Freescale P2020 processor. + + For example: P2020 DS board, P2020 RDB board, P2020 RDB PC board or + CZ.NIC Turris 1.x boards. + +config P1010_RDB + bool "Freescale P1010 RDB" + select DEFAULT_UIMAGE + help + This option enables support for the P1010 RDB board + + P1010RDB contains P1010Si, which provides CPU performance up to 800 + MHz and 1600 DMIPS, additional functionality and faster interfaces + (DDR3/3L, SATA II, and PCI Express). + +config P1022_DS + bool "Freescale P1022 DS" + select DEFAULT_UIMAGE + select SWIOTLB + help + This option enables support for the Freescale P1022DS reference board. + +config P1022_RDK + bool "Freescale / iVeia P1022 RDK" + select DEFAULT_UIMAGE + help + This option enables support for the Freescale / iVeia P1022RDK + reference board. + +config P1023_RDB + bool "Freescale P1023 RDB" + select DEFAULT_UIMAGE + help + This option enables support for the P1023 RDB board. + +config TWR_P102x + bool "Freescale TWR-P102x" + select DEFAULT_UIMAGE + help + This option enables support for the TWR-P1025 board. + +config SOCRATES + bool "Socrates" + select DEFAULT_UIMAGE + help + This option enables support for the Socrates board. + +config KSI8560 + bool "Emerson KSI8560" + select DEFAULT_UIMAGE + help + This option enables support for the Emerson KSI8560 board + +config XES_MPC85xx + bool "X-ES single-board computer" + select DEFAULT_UIMAGE + help + This option enables support for the various single-board + computers from Extreme Engineering Solutions (X-ES) based on + Freescale MPC85xx processors. + Manufacturer: Extreme Engineering Solutions, Inc. + URL: + +config STX_GP3 + bool "Silicon Turnkey Express GP3" + help + This option enables support for the Silicon Turnkey Express GP3 + board. + select CPM2 + select DEFAULT_UIMAGE + +config TQM8540 + bool "TQ Components TQM8540" + help + This option enables support for the TQ Components TQM8540 board. + select DEFAULT_UIMAGE + select TQM85xx + +config TQM8541 + bool "TQ Components TQM8541" + help + This option enables support for the TQ Components TQM8541 board. + select DEFAULT_UIMAGE + select TQM85xx + select CPM2 + +config TQM8548 + bool "TQ Components TQM8548" + help + This option enables support for the TQ Components TQM8548 board. + select DEFAULT_UIMAGE + select TQM85xx + +config TQM8555 + bool "TQ Components TQM8555" + help + This option enables support for the TQ Components TQM8555 board. + select DEFAULT_UIMAGE + select TQM85xx + select CPM2 + +config TQM8560 + bool "TQ Components TQM8560" + help + This option enables support for the TQ Components TQM8560 board. + select DEFAULT_UIMAGE + select TQM85xx + select CPM2 + +config PPA8548 + bool "Prodrive PPA8548" + help + This option enables support for the Prodrive PPA8548 board. + select DEFAULT_UIMAGE + select HAVE_RAPIDIO + +config GE_IMP3A + bool "GE Intelligent Platforms IMP3A" + select DEFAULT_UIMAGE + select SWIOTLB + select MMIO_NVRAM + select GPIOLIB + select GE_FPGA + help + This option enables support for the GE Intelligent Platforms IMP3A + board. + + This board is a 3U CompactPCI Single Board Computer with a Freescale + P2020 processor. + +config SGY_CTS1000 + tristate "Servergy CTS-1000 support" + select GPIOLIB + select OF_GPIO + depends on CORENET_GENERIC + help + Enable this to support functionality in Servergy's CTS-1000 systems. + +config MVME2500 + bool "Artesyn MVME2500" + select DEFAULT_UIMAGE + help + This option enables support for the Emerson/Artesyn MVME2500 board. + +endif # PPC32 + +config PPC_QEMU_E500 + bool "QEMU generic e500 platform" + select DEFAULT_UIMAGE + help + This option enables support for running as a QEMU guest using + QEMU's generic e500 machine. This is not required if you're + using a QEMU machine that targets a specific board, such as + mpc8544ds. + + Unlike most e500 boards that target a specific CPU, this + platform works with any e500-family CPU that QEMU supports. + Thus, you'll need to make sure CONFIG_PPC_E500MC is set or + unset based on the emulated CPU (or actual host CPU in the case + of KVM). + +config CORENET_GENERIC + bool "Freescale CoreNet Generic" + select DEFAULT_UIMAGE + select PPC_E500MC + select PHYS_64BIT + select SWIOTLB + select GPIOLIB + select GPIO_MPC8XXX + select HAVE_RAPIDIO + select PPC_EPAPR_HV_PIC + help + This option enables support for the FSL CoreNet based boards. + For 32bit kernel, the following boards are supported: + P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080 + For 64bit kernel, the following boards are supported: + T208x QDS/RDB, T4240 QDS/RDB and B4 QDS + The following boards are supported for both 32bit and 64bit kernel: + P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB + +endif # FSL_SOC_BOOKE + +config TQM85xx + bool diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile new file mode 100644 index 0000000000..43c34f26f1 --- /dev/null +++ b/arch/powerpc/platforms/85xx/Makefile @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PowerPC 85xx linux kernel. +# +obj-$(CONFIG_SMP) += smp.o +ifneq ($(CONFIG_FSL_CORENET_RCPM),y) +obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o +endif + +obj-y += common.o + +obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o +obj-$(CONFIG_BSC9132_QDS) += bsc913x_qds.o +obj-$(CONFIG_C293_PCIE) += c293pcie.o +obj-$(CONFIG_MPC8536_DS) += mpc8536_ds.o +obj8259-$(CONFIG_PPC_I8259) += mpc85xx_8259.o +obj-$(CONFIG_MPC85xx_DS) += mpc85xx_ds.o $(obj8259-y) +obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o +obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o +obj-$(CONFIG_P1010_RDB) += p1010rdb.o +obj-$(CONFIG_P1022_DS) += p1022_ds.o +obj-$(CONFIG_P1022_RDK) += p1022_rdk.o +obj-$(CONFIG_P1023_RDB) += p1023_rdb.o +obj-$(CONFIG_PPC_P2020) += p2020.o $(obj8259-y) +obj-$(CONFIG_TWR_P102x) += twr_p102x.o +obj-$(CONFIG_CORENET_GENERIC) += corenet_generic.o +obj-$(CONFIG_FB_FSL_DIU) += t1042rdb_diu.o +obj-$(CONFIG_STX_GP3) += stx_gp3.o +obj-$(CONFIG_TQM85xx) += tqm85xx.o +obj-$(CONFIG_PPA8548) += ppa8548.o +obj-$(CONFIG_SOCRATES) += socrates.o socrates_fpga_pic.o +obj-$(CONFIG_KSI8560) += ksi8560.o +obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o +obj-$(CONFIG_GE_IMP3A) += ge_imp3a.o +obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o +obj-$(CONFIG_SGY_CTS1000) += sgy_cts1000.o +obj-$(CONFIG_MVME2500) += mvme2500.o diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c new file mode 100644 index 0000000000..2eb62bff86 --- /dev/null +++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BSC913xQDS Board Setup + * + * Author: + * Harninder Rai + * Priyanka Jain + * + * Copyright 2014 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include + +#include "mpc85xx.h" +#include "smp.h" + +void __init bsc913x_qds_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + + if (!mpic) + pr_err("bsc913x: Failed to allocate MPIC structure\n"); + else + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init bsc913x_qds_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("bsc913x_qds_setup_arch()", 0); + +#if defined(CONFIG_SMP) + mpc85xx_smp_init(); +#endif + + fsl_pci_assign_primary(); + + pr_info("bsc913x board from Freescale Semiconductor\n"); +} + +machine_arch_initcall(bsc9132_qds, mpc85xx_common_publish_devices); + +define_machine(bsc9132_qds) { + .name = "BSC9132 QDS", + .compatible = "fsl,bsc9132qds", + .setup_arch = bsc913x_qds_setup_arch, + .init_IRQ = bsc913x_qds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c new file mode 100644 index 0000000000..161f006cb3 --- /dev/null +++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BSC913xRDB Board Setup + * + * Author: Priyanka Jain + * + * Copyright 2011-2012 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include + +#include "mpc85xx.h" + +void __init bsc913x_rdb_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + + if (!mpic) + pr_err("bsc913x: Failed to allocate MPIC structure\n"); + else + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init bsc913x_rdb_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("bsc913x_rdb_setup_arch()", 0); + + pr_info("bsc913x board from Freescale Semiconductor\n"); +} + +machine_device_initcall(bsc9131_rdb, mpc85xx_common_publish_devices); + +define_machine(bsc9131_rdb) { + .name = "BSC9131 RDB", + .compatible = "fsl,bsc9131rdb", + .setup_arch = bsc913x_rdb_setup_arch, + .init_IRQ = bsc913x_rdb_pic_init, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c new file mode 100644 index 0000000000..7a63a3ad5e --- /dev/null +++ b/arch/powerpc/platforms/85xx/c293pcie.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * C293PCIE Board Setup + * + * Copyright 2013 Freescale Semiconductor Inc. + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "mpc85xx.h" + +static void __init c293_pcie_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); + + BUG_ON(mpic == NULL); + + mpic_init(mpic); +} + + +/* + * Setup the architecture + */ +static void __init c293_pcie_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("c293_pcie_setup_arch()", 0); + + fsl_pci_assign_primary(); + + printk(KERN_INFO "C293 PCIE board from Freescale Semiconductor\n"); +} + +machine_arch_initcall(c293_pcie, mpc85xx_common_publish_devices); + +define_machine(c293_pcie) { + .name = "C293 PCIE", + .compatible = "fsl,C293PCIE", + .setup_arch = c293_pcie_setup_arch, + .init_IRQ = c293_pcie_pic_init, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c new file mode 100644 index 0000000000..7578111555 --- /dev/null +++ b/arch/powerpc/platforms/85xx/common.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Routines common to most mpc85xx-based boards. + */ + +#include +#include +#include + +#include +#include +#include + +#include "mpc85xx.h" + +const struct fsl_pm_ops *qoriq_pm_ops; + +static const struct of_device_id mpc85xx_common_ids[] __initconst = { + { .type = "soc", }, + { .compatible = "soc", }, + { .compatible = "simple-bus", }, + { .name = "cpm", }, + { .name = "localbus", }, + { .compatible = "gianfar", }, + { .compatible = "fsl,qe", }, + { .compatible = "fsl,cpm2", }, + { .compatible = "fsl,srio", }, + /* So that the DMA channel nodes can be probed individually: */ + { .compatible = "fsl,eloplus-dma", }, + /* For the PMC driver */ + { .compatible = "fsl,mpc8548-guts", }, + /* Probably unnecessary? */ + { .compatible = "gpio-leds", }, + /* For all PCI controllers */ + { .compatible = "fsl,mpc8540-pci", }, + { .compatible = "fsl,mpc8548-pcie", }, + { .compatible = "fsl,p1022-pcie", }, + { .compatible = "fsl,p1010-pcie", }, + { .compatible = "fsl,p1023-pcie", }, + { .compatible = "fsl,p4080-pcie", }, + { .compatible = "fsl,qoriq-pcie-v2.4", }, + { .compatible = "fsl,qoriq-pcie-v2.3", }, + { .compatible = "fsl,qoriq-pcie-v2.2", }, + { .compatible = "fsl,fman", }, + {}, +}; + +int __init mpc85xx_common_publish_devices(void) +{ + return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL); +} +#ifdef CONFIG_CPM2 +static void cpm2_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + int cascade_irq; + + while ((cascade_irq = cpm2_get_irq()) >= 0) + generic_handle_irq(cascade_irq); + + chip->irq_eoi(&desc->irq_data); +} + + +void __init mpc85xx_cpm2_pic_init(void) +{ + struct device_node *np; + int irq; + + /* Setup CPM2 PIC */ + np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic"); + if (np == NULL) { + printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n"); + return; + } + irq = irq_of_parse_and_map(np, 0); + if (!irq) { + of_node_put(np); + printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n"); + return; + } + + cpm2_pic_init(np); + of_node_put(np); + irq_set_chained_handler(irq, cpm2_cascade); +} +#endif + +#ifdef CONFIG_QUICC_ENGINE +void __init mpc85xx_qe_par_io_init(void) +{ + struct device_node *np; + + np = of_find_node_by_name(NULL, "par_io"); + if (np) { + struct device_node *ucc; + + par_io_init(np); + of_node_put(np); + + for_each_node_by_name(ucc, "ucc") + par_io_of_config(ucc); + + } +} +#endif diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c new file mode 100644 index 0000000000..645fcca77c --- /dev/null +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Corenet based SoC DS Setup + * + * Maintained by Kumar Gala (see MAINTAINERS for contact information) + * + * Copyright 2009-2011 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "smp.h" +#include "mpc85xx.h" + +static void __init corenet_gen_pic_init(void) +{ + struct mpic *mpic; + unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | + MPIC_NO_RESET; + + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE)) + flags |= MPIC_ENABLE_COREINT; + + mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC "); + BUG_ON(mpic == NULL); + + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init corenet_gen_setup_arch(void) +{ + mpc85xx_smp_init(); + + swiotlb_detect_4g(); + + pr_info("%s board\n", ppc_md.name); +} + +static const struct of_device_id of_device_ids[] = { + { + .compatible = "simple-bus" + }, + { + .compatible = "mdio-mux-gpio" + }, + { + .compatible = "fsl,fpga-ngpixis" + }, + { + .compatible = "fsl,fpga-qixis" + }, + { + .compatible = "fsl,srio", + }, + { + .compatible = "fsl,p4080-pcie", + }, + { + .compatible = "fsl,qoriq-pcie-v2.2", + }, + { + .compatible = "fsl,qoriq-pcie-v2.3", + }, + { + .compatible = "fsl,qoriq-pcie-v2.4", + }, + { + .compatible = "fsl,qoriq-pcie-v3.0", + }, + { + .compatible = "fsl,qe", + }, + /* The following two are for the Freescale hypervisor */ + { + .name = "hypervisor", + }, + { + .name = "handles", + }, + {} +}; + +static int __init corenet_gen_publish_devices(void) +{ + return of_platform_bus_probe(NULL, of_device_ids, NULL); +} +machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); + +static const char * const boards[] __initconst = { + "fsl,P2041RDB", + "fsl,P3041DS", + "fsl,OCA4080", + "fsl,P4080DS", + "fsl,P5020DS", + "fsl,P5040DS", + "fsl,T2080QDS", + "fsl,T2080RDB", + "fsl,T2081QDS", + "fsl,T4240QDS", + "fsl,T4240RDB", + "fsl,B4860QDS", + "fsl,B4420QDS", + "fsl,B4220QDS", + "fsl,T1023RDB", + "fsl,T1024QDS", + "fsl,T1024RDB", + "fsl,T1040D4RDB", + "fsl,T1042D4RDB", + "fsl,T1040QDS", + "fsl,T1042QDS", + "fsl,T1040RDB", + "fsl,T1042RDB", + "fsl,T1042RDB_PI", + "keymile,kmcent2", + "keymile,kmcoge4", + "varisys,CYRUS", + NULL +}; + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init corenet_generic_probe(void) +{ + char hv_compat[24]; + int i; +#ifdef CONFIG_SMP + extern struct smp_ops_t smp_85xx_ops; +#endif + + if (of_device_compatible_match(of_root, boards)) + return 1; + + /* Check if we're running under the Freescale hypervisor */ + for (i = 0; boards[i]; i++) { + snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]); + if (of_machine_is_compatible(hv_compat)) { + ppc_md.init_IRQ = ehv_pic_init; + + ppc_md.get_irq = ehv_pic_get_irq; + ppc_md.restart = fsl_hv_restart; + pm_power_off = fsl_hv_halt; + ppc_md.halt = fsl_hv_halt; +#ifdef CONFIG_SMP + /* + * Disable the timebase sync operations because we + * can't write to the timebase registers under the + * hypervisor. + */ + smp_85xx_ops.give_timebase = NULL; + smp_85xx_ops.take_timebase = NULL; +#endif + return 1; + } + } + + return 0; +} + +define_machine(corenet_generic) { + .name = "CoreNet Generic", + .probe = corenet_generic_probe, + .setup_arch = corenet_gen_setup_arch, + .init_IRQ = corenet_gen_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif +/* + * Core reset may cause issues if using the proxy mode of MPIC. + * So, use the mixed mode of MPIC if enabling CPU hotplug. + * + * Likewise, problems have been seen with kexec when coreint is enabled. + */ +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) + .get_irq = mpic_get_irq, +#else + .get_irq = mpic_get_coreint_irq, +#endif + .progress = udbg_progress, + .power_save = e500_idle, +}; diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c new file mode 100644 index 0000000000..9c3b44a195 --- /dev/null +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GE IMP3A Board Setup + * + * Author Martyn Welch + * + * Copyright 2010 GE Intelligent Platforms Embedded Systems, Inc. + * + * Based on: mpc85xx_ds.c (MPC85xx DS Board Setup) + * Copyright 2007 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "smp.h" + +#include "mpc85xx.h" +#include + +void __iomem *imp3a_regs; + +void __init ge_imp3a_pic_init(void) +{ + struct mpic *mpic; + struct device_node *np; + struct device_node *cascade_node = NULL; + + if (of_machine_is_compatible("fsl,MPC8572DS-CAMP")) { + mpic = mpic_alloc(NULL, 0, + MPIC_NO_RESET | + MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + } else { + mpic = mpic_alloc(NULL, 0, + MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + } + + BUG_ON(mpic == NULL); + mpic_init(mpic); + /* + * There is a simple interrupt handler in the main FPGA, this needs + * to be cascaded into the MPIC + */ + for_each_node_by_type(np, "interrupt-controller") + if (of_device_is_compatible(np, "gef,fpga-pic-1.00")) { + cascade_node = np; + break; + } + + if (cascade_node == NULL) { + printk(KERN_WARNING "IMP3A: No FPGA PIC\n"); + return; + } + + gef_pic_init(cascade_node); + of_node_put(cascade_node); +} + +static void __init ge_imp3a_pci_assign_primary(void) +{ +#ifdef CONFIG_PCI + struct device_node *np; + struct resource rsrc; + + for_each_node_by_type(np, "pci") { + if (of_device_is_compatible(np, "fsl,mpc8540-pci") || + of_device_is_compatible(np, "fsl,mpc8548-pcie") || + of_device_is_compatible(np, "fsl,p2020-pcie")) { + of_address_to_resource(np, 0, &rsrc); + if ((rsrc.start & 0xfffff) == 0x9000) { + of_node_put(fsl_pci_primary); + fsl_pci_primary = of_node_get(np); + } + } + } +#endif +} + +/* + * Setup the architecture + */ +static void __init ge_imp3a_setup_arch(void) +{ + struct device_node *regs; + + if (ppc_md.progress) + ppc_md.progress("ge_imp3a_setup_arch()", 0); + + mpc85xx_smp_init(); + + ge_imp3a_pci_assign_primary(); + + swiotlb_detect_4g(); + + /* Remap basic board registers */ + regs = of_find_compatible_node(NULL, NULL, "ge,imp3a-fpga-regs"); + if (regs) { + imp3a_regs = of_iomap(regs, 0); + if (imp3a_regs == NULL) + printk(KERN_WARNING "Unable to map board registers\n"); + of_node_put(regs); + } + +#if defined(CONFIG_MMIO_NVRAM) + mmio_nvram_init(); +#endif + + printk(KERN_INFO "GE Intelligent Platforms IMP3A 3U cPCI SBC\n"); +} + +/* Return the PCB revision */ +static unsigned int ge_imp3a_get_pcb_rev(void) +{ + unsigned int reg; + + reg = ioread16(imp3a_regs); + return (reg >> 8) & 0xff; +} + +/* Return the board (software) revision */ +static unsigned int ge_imp3a_get_board_rev(void) +{ + unsigned int reg; + + reg = ioread16(imp3a_regs + 0x2); + return reg & 0xff; +} + +/* Return the FPGA revision */ +static unsigned int ge_imp3a_get_fpga_rev(void) +{ + unsigned int reg; + + reg = ioread16(imp3a_regs + 0x2); + return (reg >> 8) & 0xff; +} + +/* Return compactPCI Geographical Address */ +static unsigned int ge_imp3a_get_cpci_geo_addr(void) +{ + unsigned int reg; + + reg = ioread16(imp3a_regs + 0x6); + return (reg & 0x0f00) >> 8; +} + +/* Return compactPCI System Controller Status */ +static unsigned int ge_imp3a_get_cpci_is_syscon(void) +{ + unsigned int reg; + + reg = ioread16(imp3a_regs + 0x6); + return reg & (1 << 12); +} + +static void ge_imp3a_show_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n"); + + seq_printf(m, "Revision\t: %u%c\n", ge_imp3a_get_pcb_rev(), + ('A' + ge_imp3a_get_board_rev() - 1)); + + seq_printf(m, "FPGA Revision\t: %u\n", ge_imp3a_get_fpga_rev()); + + seq_printf(m, "cPCI geo. addr\t: %u\n", ge_imp3a_get_cpci_geo_addr()); + + seq_printf(m, "cPCI syscon\t: %s\n", + ge_imp3a_get_cpci_is_syscon() ? "yes" : "no"); +} + +machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices); + +define_machine(ge_imp3a) { + .name = "GE_IMP3A", + .compatible = "ge,IMP3A", + .setup_arch = ge_imp3a_setup_arch, + .init_IRQ = ge_imp3a_pic_init, + .show_cpuinfo = ge_imp3a_show_cpuinfo, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c new file mode 100644 index 0000000000..1b6326a4b0 --- /dev/null +++ b/arch/powerpc/platforms/85xx/ksi8560.c @@ -0,0 +1,184 @@ +/* + * Board setup routines for the Emerson KSI8560 + * + * Author: Alexandr Smirnov + * + * Based on mpc85xx_ads.c maintained by Kumar Gala + * + * 2008 (c) MontaVista, Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "mpc85xx.h" + +#define KSI8560_CPLD_HVR 0x04 /* Hardware Version Register */ +#define KSI8560_CPLD_PVR 0x08 /* PLD Version Register */ +#define KSI8560_CPLD_RCR1 0x30 /* Reset Command Register 1 */ + +#define KSI8560_CPLD_RCR1_CPUHR 0x80 /* CPU Hard Reset */ + +static void __iomem *cpld_base = NULL; + +static void __noreturn machine_restart(char *cmd) +{ + if (cpld_base) + out_8(cpld_base + KSI8560_CPLD_RCR1, KSI8560_CPLD_RCR1_CPUHR); + else + printk(KERN_ERR "Can't find CPLD base, hang forever\n"); + + for (;;); +} + +static void __init ksi8560_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); + + mpc85xx_cpm2_pic_init(); +} + +#ifdef CONFIG_CPM2 +/* + * Setup I/O ports + */ +struct cpm_pin { + int port, pin, flags; +}; + +static struct cpm_pin __initdata ksi8560_pins[] = { + /* SCC1 */ + {3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + + /* SCC2 */ + {3, 26, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + + /* FCC1 */ + {0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, + {0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, + {0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, + {2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK9 */ + {2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK10 */ + +}; + +static void __init init_ioports(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ksi8560_pins); i++) { + struct cpm_pin *pin = &ksi8560_pins[i]; + cpm2_set_pin(pin->port, pin->pin, pin->flags); + } + + cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX); + cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX); + cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9, CPM_CLK_RX); + cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX); +} +#endif + +/* + * Setup the architecture + */ +static void __init ksi8560_setup_arch(void) +{ + struct device_node *cpld; + + cpld = of_find_compatible_node(NULL, NULL, "emerson,KSI8560-cpld"); + if (cpld) + cpld_base = of_iomap(cpld, 0); + else + printk(KERN_ERR "Can't find CPLD in device tree\n"); + + of_node_put(cpld); + + if (ppc_md.progress) + ppc_md.progress("ksi8560_setup_arch()", 0); + +#ifdef CONFIG_CPM2 + cpm2_reset(); + init_ioports(); +#endif +} + +static void ksi8560_show_cpuinfo(struct seq_file *m) +{ + uint pvid, svid, phid1; + + pvid = mfspr(SPRN_PVR); + svid = mfspr(SPRN_SVR); + + seq_printf(m, "Vendor\t\t: Emerson Network Power\n"); + seq_printf(m, "Board\t\t: KSI8560\n"); + + if (cpld_base) { + seq_printf(m, "Hardware rev\t: %d\n", + in_8(cpld_base + KSI8560_CPLD_HVR)); + seq_printf(m, "CPLD rev\t: %d\n", + in_8(cpld_base + KSI8560_CPLD_PVR)); + } else + seq_printf(m, "Unknown Hardware and CPLD revs\n"); + + seq_printf(m, "PVR\t\t: 0x%x\n", pvid); + seq_printf(m, "SVR\t\t: 0x%x\n", svid); + + /* Display cpu Pll setting */ + phid1 = mfspr(SPRN_HID1); + seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); +} + +machine_device_initcall(ksi8560, mpc85xx_common_publish_devices); + +define_machine(ksi8560) { + .name = "KSI8560", + .compatible = "emerson,KSI8560", + .setup_arch = ksi8560_setup_arch, + .init_IRQ = ksi8560_pic_init, + .show_cpuinfo = ksi8560_show_cpuinfo, + .get_irq = mpic_get_irq, + .restart = machine_restart, +}; diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c new file mode 100644 index 0000000000..e966b2ad8e --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MPC8536 DS Board Setup + * + * Copyright 2008 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc85xx.h" + +void __init mpc8536_ds_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init mpc8536_ds_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mpc8536_ds_setup_arch()", 0); + + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); + + printk("MPC8536 DS board from Freescale Semiconductor\n"); +} + +machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices); + +define_machine(mpc8536_ds) { + .name = "MPC8536 DS", + .compatible = "fsl,mpc8536ds", + .setup_arch = mpc8536_ds_setup_arch, + .init_IRQ = mpc8536_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h new file mode 100644 index 0000000000..c764d7551e --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc85xx.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef MPC85xx_H +#define MPC85xx_H +extern int mpc85xx_common_publish_devices(void); + +#ifdef CONFIG_CPM2 +extern void mpc85xx_cpm2_pic_init(void); +#else +static inline void __init mpc85xx_cpm2_pic_init(void) {} +#endif /* CONFIG_CPM2 */ + +#ifdef CONFIG_QUICC_ENGINE +extern void mpc85xx_qe_par_io_init(void); +#else +static inline void __init mpc85xx_qe_par_io_init(void) {} +#endif + +#ifdef CONFIG_PPC_I8259 +void __init mpc85xx_8259_init(void); +#else +static inline void __init mpc85xx_8259_init(void) {} +#endif + +#endif diff --git a/arch/powerpc/platforms/85xx/mpc85xx_8259.c b/arch/powerpc/platforms/85xx/mpc85xx_8259.c new file mode 100644 index 0000000000..cb00d596ad --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc85xx_8259.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MPC85xx 8259 functions for DS Board Setup + * + * Author Xianghua Xiao (x.xiao@freescale.com) + * Roy Zang + * - Add PCI/PCI Express support + * Copyright 2007 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc85xx.h" + +static void mpc85xx_8259_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int cascade_irq = i8259_irq(); + + if (cascade_irq) + generic_handle_irq(cascade_irq); + + chip->irq_eoi(&desc->irq_data); +} + +void __init mpc85xx_8259_init(void) +{ + struct device_node *np; + struct device_node *cascade_node = NULL; + int cascade_irq; + + /* Initialize the i8259 controller */ + for_each_node_by_type(np, "interrupt-controller") { + if (of_device_is_compatible(np, "chrp,iic")) { + cascade_node = np; + break; + } + } + + if (cascade_node == NULL) { + pr_debug("i8259: Could not find i8259 PIC\n"); + return; + } + + cascade_irq = irq_of_parse_and_map(cascade_node, 0); + if (!cascade_irq) { + pr_err("i8259: Failed to map cascade interrupt\n"); + return; + } + + pr_debug("i8259: cascade mapped to irq %d\n", cascade_irq); + + i8259_init(cascade_node, 0); + of_node_put(cascade_node); + + irq_set_chained_handler(cascade_irq, mpc85xx_8259_cascade); +} diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c new file mode 100644 index 0000000000..2856148321 --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MPC85xx DS Board Setup + * + * Author Xianghua Xiao (x.xiao@freescale.com) + * Roy Zang + * - Add PCI/PCI Exprees support + * Copyright 2007 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +static void __init mpc85xx_ds_pic_init(void) +{ + struct mpic *mpic; + int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU; + + if (of_machine_is_compatible("fsl,MPC8572DS-CAMP")) + flags |= MPIC_NO_RESET; + + mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC "); + + if (WARN_ON(!mpic)) + return; + + mpic_init(mpic); + + mpc85xx_8259_init(); +} + +/* + * Setup the architecture + */ +static void __init mpc85xx_ds_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mpc85xx_ds_setup_arch()", 0); + + swiotlb_detect_4g(); + fsl_pci_assign_primary(); + uli_init(); + mpc85xx_smp_init(); + + pr_info("MPC85xx DS board from Freescale Semiconductor\n"); +} + +machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices); +machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices); + +define_machine(mpc8544_ds) { + .name = "MPC8544 DS", + .compatible = "MPC8544DS", + .setup_arch = mpc85xx_ds_setup_arch, + .init_IRQ = mpc85xx_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(mpc8572_ds) { + .name = "MPC8572 DS", + .compatible = "fsl,MPC8572DS", + .setup_arch = mpc85xx_ds_setup_arch, + .init_IRQ = mpc85xx_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c new file mode 100644 index 0000000000..c19490cf63 --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2006-2010, 2012-2013 Freescale Semiconductor, Inc. + * All rights reserved. + * + * Author: Andy Fleming + * + * Based on 83xx/mpc8360e_pb.c by: + * Li Yang + * Yin Olivia + * + * Description: + * MPC85xx MDS board specific routines. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +#if IS_BUILTIN(CONFIG_PHYLIB) + +#define MV88E1111_SCR 0x10 +#define MV88E1111_SCR_125CLK 0x0010 +static int mpc8568_fixup_125_clock(struct phy_device *phydev) +{ + int scr; + int err; + + /* Workaround for the 125 CLK Toggle */ + scr = phy_read(phydev, MV88E1111_SCR); + + if (scr < 0) + return scr; + + err = phy_write(phydev, MV88E1111_SCR, scr & ~(MV88E1111_SCR_125CLK)); + + if (err) + return err; + + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + if (err) + return err; + + scr = phy_read(phydev, MV88E1111_SCR); + + if (scr < 0) + return scr; + + err = phy_write(phydev, MV88E1111_SCR, scr | 0x0008); + + return err; +} + +static int mpc8568_mds_phy_fixups(struct phy_device *phydev) +{ + int temp; + int err; + + /* Errata */ + err = phy_write(phydev,29, 0x0006); + + if (err) + return err; + + temp = phy_read(phydev, 30); + + if (temp < 0) + return temp; + + temp = (temp & (~0x8000)) | 0x4000; + err = phy_write(phydev,30, temp); + + if (err) + return err; + + err = phy_write(phydev,29, 0x000a); + + if (err) + return err; + + temp = phy_read(phydev, 30); + + if (temp < 0) + return temp; + + temp = phy_read(phydev, 30); + + if (temp < 0) + return temp; + + temp &= ~0x0020; + + err = phy_write(phydev,30,temp); + + if (err) + return err; + + /* Disable automatic MDI/MDIX selection */ + temp = phy_read(phydev, 16); + + if (temp < 0) + return temp; + + temp &= ~0x0060; + err = phy_write(phydev,16,temp); + + return err; +} + +#endif + +/* ************************************************************************ + * + * Setup the architecture + * + */ +#ifdef CONFIG_QUICC_ENGINE +static void __init mpc85xx_mds_reset_ucc_phys(void) +{ + struct device_node *np; + static u8 __iomem *bcsr_regs; + + /* Map BCSR area */ + np = of_find_node_by_name(NULL, "bcsr"); + if (!np) + return; + + bcsr_regs = of_iomap(np, 0); + of_node_put(np); + if (!bcsr_regs) + return; + + if (machine_is(mpc8568_mds)) { +#define BCSR_UCC1_GETH_EN (0x1 << 7) +#define BCSR_UCC2_GETH_EN (0x1 << 7) +#define BCSR_UCC1_MODE_MSK (0x3 << 4) +#define BCSR_UCC2_MODE_MSK (0x3 << 0) + + /* Turn off UCC1 & UCC2 */ + clrbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN); + clrbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN); + + /* Mode is RGMII, all bits clear */ + clrbits8(&bcsr_regs[11], BCSR_UCC1_MODE_MSK | + BCSR_UCC2_MODE_MSK); + + /* Turn UCC1 & UCC2 on */ + setbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN); + setbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN); + } else if (machine_is(mpc8569_mds)) { +#define BCSR7_UCC12_GETHnRST (0x1 << 2) +#define BCSR8_UEM_MARVELL_RST (0x1 << 1) +#define BCSR_UCC_RGMII (0x1 << 6) +#define BCSR_UCC_RTBI (0x1 << 5) + /* + * U-Boot mangles interrupt polarity for Marvell PHYs, + * so reset built-in and UEM Marvell PHYs, this puts + * the PHYs into their normal state. + */ + clrbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST); + setbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST); + + setbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST); + clrbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST); + + for_each_compatible_node(np, "network", "ucc_geth") { + const unsigned int *prop; + int ucc_num; + + prop = of_get_property(np, "cell-index", NULL); + if (prop == NULL) + continue; + + ucc_num = *prop - 1; + + prop = of_get_property(np, "phy-connection-type", NULL); + if (prop == NULL) + continue; + + if (strcmp("rtbi", (const char *)prop) == 0) + clrsetbits_8(&bcsr_regs[7 + ucc_num], + BCSR_UCC_RGMII, BCSR_UCC_RTBI); + } + } else if (machine_is(p1021_mds)) { +#define BCSR11_ENET_MICRST (0x1 << 5) + /* Reset Micrel PHY */ + clrbits8(&bcsr_regs[11], BCSR11_ENET_MICRST); + setbits8(&bcsr_regs[11], BCSR11_ENET_MICRST); + } + + iounmap(bcsr_regs); +} + +static void __init mpc85xx_mds_qe_init(void) +{ + struct device_node *np; + + mpc85xx_qe_par_io_init(); + mpc85xx_mds_reset_ucc_phys(); + + if (machine_is(p1021_mds)) { + + struct ccsr_guts __iomem *guts; + + np = of_find_node_by_name(NULL, "global-utilities"); + if (np) { + guts = of_iomap(np, 0); + if (!guts) + pr_err("mpc85xx-rdb: could not map global utilities register\n"); + else{ + /* P1021 has pins muxed for QE and other functions. To + * enable QE UEC mode, we need to set bit QE0 for UCC1 + * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9 + * and QE12 for QE MII management signals in PMUXCR + * register. + */ + setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) | + MPC85xx_PMUXCR_QE(3) | + MPC85xx_PMUXCR_QE(9) | + MPC85xx_PMUXCR_QE(12)); + iounmap(guts); + } + of_node_put(np); + } + + } +} + +#else +static void __init mpc85xx_mds_qe_init(void) { } +#endif /* CONFIG_QUICC_ENGINE */ + +static void __init mpc85xx_mds_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mpc85xx_mds_setup_arch()", 0); + + mpc85xx_smp_init(); + + mpc85xx_mds_qe_init(); + + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); +} + +#if IS_BUILTIN(CONFIG_PHYLIB) + +static int __init board_fixups(void) +{ + char phy_id[20]; + char *compstrs[2] = {"fsl,gianfar-mdio", "fsl,ucc-mdio"}; + struct device_node *mdio; + struct resource res; + int i; + + for (i = 0; i < ARRAY_SIZE(compstrs); i++) { + mdio = of_find_compatible_node(NULL, NULL, compstrs[i]); + + of_address_to_resource(mdio, 0, &res); + snprintf(phy_id, sizeof(phy_id), "%llx:%02x", + (unsigned long long)res.start, 1); + + phy_register_fixup_for_id(phy_id, mpc8568_fixup_125_clock); + phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups); + + /* Register a workaround for errata */ + snprintf(phy_id, sizeof(phy_id), "%llx:%02x", + (unsigned long long)res.start, 7); + phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups); + + of_node_put(mdio); + } + + return 0; +} + +machine_arch_initcall(mpc8568_mds, board_fixups); +machine_arch_initcall(mpc8569_mds, board_fixups); + +#endif + +static int __init mpc85xx_publish_devices(void) +{ + return mpc85xx_common_publish_devices(); +} + +machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices); +machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices); +machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices); + +static void __init mpc85xx_mds_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + + mpic_init(mpic); +} + +define_machine(mpc8568_mds) { + .name = "MPC8568 MDS", + .compatible = "MPC85xxMDS", + .setup_arch = mpc85xx_mds_setup_arch, + .init_IRQ = mpc85xx_mds_pic_init, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif +}; + +define_machine(mpc8569_mds) { + .name = "MPC8569 MDS", + .compatible = "fsl,MPC8569EMDS", + .setup_arch = mpc85xx_mds_setup_arch, + .init_IRQ = mpc85xx_mds_pic_init, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif +}; + +define_machine(p1021_mds) { + .name = "P1021 MDS", + .compatible = "fsl,P1021MDS", + .setup_arch = mpc85xx_mds_setup_arch, + .init_IRQ = mpc85xx_mds_pic_init, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif +}; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c new file mode 100644 index 0000000000..f7ac92a8ae --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MPC85xx PM operators + * + * Copyright 2015 Freescale Semiconductor Inc. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include + +#include +#include + +#include "smp.h" + +static struct ccsr_guts __iomem *guts; + +#ifdef CONFIG_FSL_PMC +static void mpc85xx_irq_mask(int cpu) +{ + +} + +static void mpc85xx_irq_unmask(int cpu) +{ + +} + +static void mpc85xx_cpu_die(int cpu) +{ + u32 tmp; + + tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP; + mtspr(SPRN_HID0, tmp); + + /* Enter NAP mode. */ + tmp = mfmsr(); + tmp |= MSR_WE; + asm volatile( + "msync\n" + "mtmsr %0\n" + "isync\n" + : + : "r" (tmp)); +} + +static void mpc85xx_cpu_up_prepare(int cpu) +{ + +} +#endif + +static void mpc85xx_freeze_time_base(bool freeze) +{ + uint32_t mask; + + mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1; + if (freeze) + setbits32(&guts->devdisr, mask); + else + clrbits32(&guts->devdisr, mask); + + in_be32(&guts->devdisr); +} + +static const struct of_device_id mpc85xx_smp_guts_ids[] = { + { .compatible = "fsl,mpc8572-guts", }, + { .compatible = "fsl,p1020-guts", }, + { .compatible = "fsl,p1021-guts", }, + { .compatible = "fsl,p1022-guts", }, + { .compatible = "fsl,p1023-guts", }, + { .compatible = "fsl,p2020-guts", }, + { .compatible = "fsl,bsc9132-guts", }, + {}, +}; + +static const struct fsl_pm_ops mpc85xx_pm_ops = { + .freeze_time_base = mpc85xx_freeze_time_base, +#ifdef CONFIG_FSL_PMC + .irq_mask = mpc85xx_irq_mask, + .irq_unmask = mpc85xx_irq_unmask, + .cpu_die = mpc85xx_cpu_die, + .cpu_up_prepare = mpc85xx_cpu_up_prepare, +#endif +}; + +int __init mpc85xx_setup_pmc(void) +{ + struct device_node *np; + + np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids); + if (np) { + guts = of_iomap(np, 0); + of_node_put(np); + if (!guts) { + pr_err("Could not map guts node address\n"); + return -ENOMEM; + } + qoriq_pm_ops = &mpc85xx_pm_ops; + } + + return 0; +} diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c new file mode 100644 index 0000000000..ec9f60fbeb --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MPC85xx RDB Board Setup + * + * Copyright 2009,2012-2013 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +static void __init mpc85xx_rdb_pic_init(void) +{ + struct mpic *mpic; + int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU; + + if (of_machine_is_compatible("fsl,MPC85XXRDB-CAMP")) + flags |= MPIC_NO_RESET; + + mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC "); + + if (WARN_ON(!mpic)) + return; + + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init mpc85xx_rdb_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mpc85xx_rdb_setup_arch()", 0); + + mpc85xx_smp_init(); + + fsl_pci_assign_primary(); + + mpc85xx_qe_par_io_init(); +#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE) + if (machine_is(p1025_rdb)) { + struct device_node *np; + + struct ccsr_guts __iomem *guts; + + np = of_find_node_by_name(NULL, "global-utilities"); + if (np) { + guts = of_iomap(np, 0); + if (!guts) { + + pr_err("mpc85xx-rdb: could not map global utilities register\n"); + + } else { + /* P1025 has pins muxed for QE and other functions. To + * enable QE UEC mode, we need to set bit QE0 for UCC1 + * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9 + * and QE12 for QE MII management singals in PMUXCR + * register. + */ + setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) | + MPC85xx_PMUXCR_QE(3) | + MPC85xx_PMUXCR_QE(9) | + MPC85xx_PMUXCR_QE(12)); + iounmap(guts); + } + of_node_put(np); + } + + } +#endif + + pr_info("MPC85xx RDB board from Freescale Semiconductor\n"); +} + +machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_rdb_pd, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices); +machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices); + +define_machine(p1020_rdb) { + .name = "P1020 RDB", + .compatible = "fsl,P1020RDB", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(p1021_rdb_pc) { + .name = "P1021 RDB-PC", + .compatible = "fsl,P1021RDB-PC", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(p1025_rdb) { + .name = "P1025 RDB", + .compatible = "fsl,P1025RDB", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(p1020_mbg_pc) { + .name = "P1020 MBG-PC", + .compatible = "fsl,P1020MBG-PC", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(p1020_utm_pc) { + .name = "P1020 UTM-PC", + .compatible = "fsl,P1020UTM-PC", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(p1020_rdb_pc) { + .name = "P1020RDB-PC", + .compatible = "fsl,P1020RDB-PC", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(p1020_rdb_pd) { + .name = "P1020RDB-PD", + .compatible = "fsl,P1020RDB-PD", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(p1024_rdb) { + .name = "P1024 RDB", + .compatible = "fsl,P1024RDB", + .setup_arch = mpc85xx_rdb_setup_arch, + .init_IRQ = mpc85xx_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c new file mode 100644 index 0000000000..1b59e45a0c --- /dev/null +++ b/arch/powerpc/platforms/85xx/mvme2500.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Board setup routines for the Emerson/Artesyn MVME2500 + * + * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A. + * + * Based on earlier code by: + * + * Xianghua Xiao (x.xiao@freescale.com) + * Tom Armistead (tom.armistead@emerson.com) + * Copyright 2012 Emerson + * + * Author Alessio Igor Bogani + */ + +#include +#include +#include +#include +#include + +#include "mpc85xx.h" + +void __init mvme2500_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, + MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init mvme2500_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mvme2500_setup_arch()", 0); + fsl_pci_assign_primary(); + pr_info("MVME2500 board from Artesyn\n"); +} + +machine_arch_initcall(mvme2500, mpc85xx_common_publish_devices); + +define_machine(mvme2500) { + .name = "MVME2500", + .compatible = "artesyn,MVME2500", + .setup_arch = mvme2500_setup_arch, + .init_IRQ = mvme2500_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c new file mode 100644 index 0000000000..10d6f1fa33 --- /dev/null +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * P1010RDB Board Setup + * + * Copyright 2011 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc85xx.h" + +void __init p1010_rdb_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + + BUG_ON(mpic == NULL); + + mpic_init(mpic); +} + + +/* + * Setup the architecture + */ +static void __init p1010_rdb_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("p1010_rdb_setup_arch()", 0); + + fsl_pci_assign_primary(); + + printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n"); +} + +machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices); + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init p1010_rdb_probe(void) +{ + if (of_machine_is_compatible("fsl,P1010RDB")) + return 1; + if (of_machine_is_compatible("fsl,P1010RDB-PB")) + return 1; + return 0; +} + +define_machine(p1010_rdb) { + .name = "P1010 RDB", + .probe = p1010_rdb_probe, + .setup_arch = p1010_rdb_setup_arch, + .init_IRQ = p1010_rdb_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c new file mode 100644 index 0000000000..0dd786a061 --- /dev/null +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -0,0 +1,563 @@ +/* + * P1022DS board specific routines + * + * Authors: Travis Wheatley + * Dave Liu + * Timur Tabi + * + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This file is taken from the Freescale P1022DS BSP, with modifications: + * 2) No AMP support + * 3) No PCI endpoint support + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + +#define PMUXCR_ELBCDIU_MASK 0xc0000000 +#define PMUXCR_ELBCDIU_NOR16 0x80000000 +#define PMUXCR_ELBCDIU_DIU 0x40000000 + +/* + * Board-specific initialization of the DIU. This code should probably be + * executed when the DIU is opened, rather than in arch code, but the DIU + * driver does not have a mechanism for this (yet). + * + * This is especially problematic on the P1022DS because the local bus (eLBC) + * and the DIU video signals share the same pins, which means that enabling the + * DIU will disable access to NOR flash. + */ + +/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */ +#define CLKDVDR_PXCKEN 0x80000000 +#define CLKDVDR_PXCKINV 0x10000000 +#define CLKDVDR_PXCKDLY 0x06000000 +#define CLKDVDR_PXCLK_MASK 0x00FF0000 + +/* Some ngPIXIS register definitions */ +#define PX_CTL 3 +#define PX_BRDCFG0 8 +#define PX_BRDCFG1 9 + +#define PX_BRDCFG0_ELBC_SPI_MASK 0xc0 +#define PX_BRDCFG0_ELBC_SPI_ELBC 0x00 +#define PX_BRDCFG0_ELBC_SPI_NULL 0xc0 +#define PX_BRDCFG0_ELBC_DIU 0x02 + +#define PX_BRDCFG1_DVIEN 0x80 +#define PX_BRDCFG1_DFPEN 0x40 +#define PX_BRDCFG1_BACKLIGHT 0x20 +#define PX_BRDCFG1_DDCEN 0x10 + +#define PX_CTL_ALTACC 0x80 + +/* + * DIU Area Descriptor + * + * Note that we need to byte-swap the value before it's written to the AD + * register. So even though the registers don't look like they're in the same + * bit positions as they are on the MPC8610, the same value is written to the + * AD register on the MPC8610 and on the P1022. + */ +#define AD_BYTE_F 0x10000000 +#define AD_ALPHA_C_MASK 0x0E000000 +#define AD_ALPHA_C_SHIFT 25 +#define AD_BLUE_C_MASK 0x01800000 +#define AD_BLUE_C_SHIFT 23 +#define AD_GREEN_C_MASK 0x00600000 +#define AD_GREEN_C_SHIFT 21 +#define AD_RED_C_MASK 0x00180000 +#define AD_RED_C_SHIFT 19 +#define AD_PALETTE 0x00040000 +#define AD_PIXEL_S_MASK 0x00030000 +#define AD_PIXEL_S_SHIFT 16 +#define AD_COMP_3_MASK 0x0000F000 +#define AD_COMP_3_SHIFT 12 +#define AD_COMP_2_MASK 0x00000F00 +#define AD_COMP_2_SHIFT 8 +#define AD_COMP_1_MASK 0x000000F0 +#define AD_COMP_1_SHIFT 4 +#define AD_COMP_0_MASK 0x0000000F +#define AD_COMP_0_SHIFT 0 + +#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \ + cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \ + (blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \ + (red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \ + (c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \ + (c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT)) + +struct fsl_law { + u32 lawbar; + u32 reserved1; + u32 lawar; + u32 reserved[5]; +}; + +#define LAWBAR_MASK 0x00F00000 +#define LAWBAR_SHIFT 12 + +#define LAWAR_EN 0x80000000 +#define LAWAR_TGT_MASK 0x01F00000 +#define LAW_TRGT_IF_LBC (0x04 << 20) + +#define LAWAR_MASK (LAWAR_EN | LAWAR_TGT_MASK) +#define LAWAR_MATCH (LAWAR_EN | LAW_TRGT_IF_LBC) + +#define BR_BA 0xFFFF8000 + +/* + * Map a BRx value to a physical address + * + * The localbus BRx registers only store the lower 32 bits of the address. To + * obtain the upper four bits, we need to scan the LAW table. The entry which + * maps to the localbus will contain the upper four bits. + */ +static phys_addr_t lbc_br_to_phys(const void *ecm, unsigned int count, u32 br) +{ +#ifndef CONFIG_PHYS_64BIT + /* + * If we only have 32-bit addressing, then the BRx address *is* the + * physical address. + */ + return br & BR_BA; +#else + const struct fsl_law *law = ecm + 0xc08; + unsigned int i; + + for (i = 0; i < count; i++) { + u64 lawbar = in_be32(&law[i].lawbar); + u32 lawar = in_be32(&law[i].lawar); + + if ((lawar & LAWAR_MASK) == LAWAR_MATCH) + /* Extract the upper four bits */ + return (br & BR_BA) | ((lawbar & LAWBAR_MASK) << 12); + } + + return 0; +#endif +} + +/** + * p1022ds_set_monitor_port: switch the output to a different monitor port + */ +static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port) +{ + struct device_node *guts_node; + struct device_node *lbc_node = NULL; + struct device_node *law_node = NULL; + struct ccsr_guts __iomem *guts; + struct fsl_lbc_regs *lbc = NULL; + void *ecm = NULL; + u8 __iomem *lbc_lcs0_ba = NULL; + u8 __iomem *lbc_lcs1_ba = NULL; + phys_addr_t cs0_addr, cs1_addr; + u32 br0, or0, br1, or1; + const __be32 *iprop; + unsigned int num_laws; + u8 b; + + /* Map the global utilities registers. */ + guts_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts"); + if (!guts_node) { + pr_err("p1022ds: missing global utilities device node\n"); + return; + } + + guts = of_iomap(guts_node, 0); + if (!guts) { + pr_err("p1022ds: could not map global utilities device\n"); + goto exit; + } + + lbc_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc"); + if (!lbc_node) { + pr_err("p1022ds: missing localbus node\n"); + goto exit; + } + + lbc = of_iomap(lbc_node, 0); + if (!lbc) { + pr_err("p1022ds: could not map localbus node\n"); + goto exit; + } + + law_node = of_find_compatible_node(NULL, NULL, "fsl,ecm-law"); + if (!law_node) { + pr_err("p1022ds: missing local access window node\n"); + goto exit; + } + + ecm = of_iomap(law_node, 0); + if (!ecm) { + pr_err("p1022ds: could not map local access window node\n"); + goto exit; + } + + iprop = of_get_property(law_node, "fsl,num-laws", NULL); + if (!iprop) { + pr_err("p1022ds: LAW node is missing fsl,num-laws property\n"); + goto exit; + } + num_laws = be32_to_cpup(iprop); + + /* + * Indirect mode requires both BR0 and BR1 to be set to "GPCM", + * otherwise writes to these addresses won't actually appear on the + * local bus, and so the PIXIS won't see them. + * + * In FCM mode, writes go to the NAND controller, which does not pass + * them to the localbus directly. So we force BR0 and BR1 into GPCM + * mode, since we don't care about what's behind the localbus any + * more. + */ + br0 = in_be32(&lbc->bank[0].br); + br1 = in_be32(&lbc->bank[1].br); + or0 = in_be32(&lbc->bank[0].or); + or1 = in_be32(&lbc->bank[1].or); + + /* Make sure CS0 and CS1 are programmed */ + if (!(br0 & BR_V) || !(br1 & BR_V)) { + pr_err("p1022ds: CS0 and/or CS1 is not programmed\n"); + goto exit; + } + + /* + * Use the existing BRx/ORx values if it's already GPCM. Otherwise, + * force the values to simple 32KB GPCM windows with the most + * conservative timing. + */ + if ((br0 & BR_MSEL) != BR_MS_GPCM) { + br0 = (br0 & BR_BA) | BR_V; + or0 = 0xFFFF8000 | 0xFF7; + out_be32(&lbc->bank[0].br, br0); + out_be32(&lbc->bank[0].or, or0); + } + if ((br1 & BR_MSEL) != BR_MS_GPCM) { + br1 = (br1 & BR_BA) | BR_V; + or1 = 0xFFFF8000 | 0xFF7; + out_be32(&lbc->bank[1].br, br1); + out_be32(&lbc->bank[1].or, or1); + } + + cs0_addr = lbc_br_to_phys(ecm, num_laws, br0); + if (!cs0_addr) { + pr_err("p1022ds: could not determine physical address for CS0" + " (BR0=%08x)\n", br0); + goto exit; + } + cs1_addr = lbc_br_to_phys(ecm, num_laws, br1); + if (!cs1_addr) { + pr_err("p1022ds: could not determine physical address for CS1" + " (BR1=%08x)\n", br1); + goto exit; + } + + lbc_lcs0_ba = ioremap(cs0_addr, 1); + if (!lbc_lcs0_ba) { + pr_err("p1022ds: could not ioremap CS0 address %llx\n", + (unsigned long long)cs0_addr); + goto exit; + } + lbc_lcs1_ba = ioremap(cs1_addr, 1); + if (!lbc_lcs1_ba) { + pr_err("p1022ds: could not ioremap CS1 address %llx\n", + (unsigned long long)cs1_addr); + goto exit; + } + + /* Make sure we're in indirect mode first. */ + if ((in_be32(&guts->pmuxcr) & PMUXCR_ELBCDIU_MASK) != + PMUXCR_ELBCDIU_DIU) { + struct device_node *pixis_node; + void __iomem *pixis; + + pixis_node = + of_find_compatible_node(NULL, NULL, "fsl,p1022ds-fpga"); + if (!pixis_node) { + pr_err("p1022ds: missing pixis node\n"); + goto exit; + } + + pixis = of_iomap(pixis_node, 0); + of_node_put(pixis_node); + if (!pixis) { + pr_err("p1022ds: could not map pixis registers\n"); + goto exit; + } + + /* Enable indirect PIXIS mode. */ + setbits8(pixis + PX_CTL, PX_CTL_ALTACC); + iounmap(pixis); + + /* Switch the board mux to the DIU */ + out_8(lbc_lcs0_ba, PX_BRDCFG0); /* BRDCFG0 */ + b = in_8(lbc_lcs1_ba); + b |= PX_BRDCFG0_ELBC_DIU; + out_8(lbc_lcs1_ba, b); + + /* Set the chip mux to DIU mode. */ + clrsetbits_be32(&guts->pmuxcr, PMUXCR_ELBCDIU_MASK, + PMUXCR_ELBCDIU_DIU); + in_be32(&guts->pmuxcr); + } + + + switch (port) { + case FSL_DIU_PORT_DVI: + /* Enable the DVI port, disable the DFP and the backlight */ + out_8(lbc_lcs0_ba, PX_BRDCFG1); + b = in_8(lbc_lcs1_ba); + b &= ~(PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT); + b |= PX_BRDCFG1_DVIEN; + out_8(lbc_lcs1_ba, b); + break; + case FSL_DIU_PORT_LVDS: + /* + * LVDS also needs backlight enabled, otherwise the display + * will be blank. + */ + /* Enable the DFP port, disable the DVI and the backlight */ + out_8(lbc_lcs0_ba, PX_BRDCFG1); + b = in_8(lbc_lcs1_ba); + b &= ~PX_BRDCFG1_DVIEN; + b |= PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT; + out_8(lbc_lcs1_ba, b); + break; + default: + pr_err("p1022ds: unsupported monitor port %i\n", port); + } + +exit: + if (lbc_lcs1_ba) + iounmap(lbc_lcs1_ba); + if (lbc_lcs0_ba) + iounmap(lbc_lcs0_ba); + if (lbc) + iounmap(lbc); + if (ecm) + iounmap(ecm); + if (guts) + iounmap(guts); + + of_node_put(law_node); + of_node_put(lbc_node); + of_node_put(guts_node); +} + +/** + * p1022ds_set_pixel_clock: program the DIU's clock + * + * @pixclock: the wavelength, in picoseconds, of the clock + */ +void p1022ds_set_pixel_clock(unsigned int pixclock) +{ + struct device_node *guts_np = NULL; + struct ccsr_guts __iomem *guts; + unsigned long freq; + u64 temp; + u32 pxclk; + + /* Map the global utilities registers. */ + guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts"); + if (!guts_np) { + pr_err("p1022ds: missing global utilities device node\n"); + return; + } + + guts = of_iomap(guts_np, 0); + of_node_put(guts_np); + if (!guts) { + pr_err("p1022ds: could not map global utilities device\n"); + return; + } + + /* Convert pixclock from a wavelength to a frequency */ + temp = 1000000000000ULL; + do_div(temp, pixclock); + freq = temp; + + /* + * 'pxclk' is the ratio of the platform clock to the pixel clock. + * This number is programmed into the CLKDVDR register, and the valid + * range of values is 2-255. + */ + pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq); + pxclk = clamp_t(u32, pxclk, 2, 255); + + /* Disable the pixel clock, and set it to non-inverted and no delay */ + clrbits32(&guts->clkdvdr, + CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK); + + /* Enable the clock and set the pxclk */ + setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16)); + + iounmap(guts); +} + +/** + * p1022ds_valid_monitor_port: set the monitor port for sysfs + */ +enum fsl_diu_monitor_port +p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port) +{ + switch (port) { + case FSL_DIU_PORT_DVI: + case FSL_DIU_PORT_LVDS: + return port; + default: + return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */ + } +} + +#endif + +void __init p1022_ds_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + +/* TRUE if there is a "video=fslfb" command-line parameter. */ +static bool fslfb; + +/* + * Search for a "video=fslfb" command-line parameter, and set 'fslfb' to + * true if we find it. + * + * We need to use early_param() instead of __setup() because the normal + * __setup() gets called to late. However, early_param() gets called very + * early, before the device tree is unflattened, so all we can do now is set a + * global variable. Later on, p1022_ds_setup_arch() will use that variable + * to determine if we need to update the device tree. + */ +static int __init early_video_setup(char *options) +{ + fslfb = (strncmp(options, "fslfb:", 6) == 0); + + return 0; +} +early_param("video", early_video_setup); + +#endif + +/* + * Setup the architecture + */ +static void __init p1022_ds_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("p1022_ds_setup_arch()", 0); + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + diu_ops.set_monitor_port = p1022ds_set_monitor_port; + diu_ops.set_pixel_clock = p1022ds_set_pixel_clock; + diu_ops.valid_monitor_port = p1022ds_valid_monitor_port; + + /* + * Disable the NOR and NAND flash nodes if there is video=fslfb... + * command-line parameter. When the DIU is active, the localbus is + * unavailable, so we have to disable these nodes before the MTD + * driver loads. + */ + if (fslfb) { + struct device_node *np = + of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc"); + + if (np) { + struct device_node *np2; + + of_node_get(np); + np2 = of_find_compatible_node(np, NULL, "cfi-flash"); + if (np2) { + static struct property nor_status = { + .name = "status", + .value = "disabled", + .length = sizeof("disabled"), + }; + + /* + * of_update_property() is called before + * kmalloc() is available, so the 'new' object + * should be allocated in the global area. + * The easiest way is to do that is to + * allocate one static local variable for each + * call to this function. + */ + pr_info("p1022ds: disabling %pOF node", + np2); + of_update_property(np2, &nor_status); + of_node_put(np2); + } + + of_node_get(np); + np2 = of_find_compatible_node(np, NULL, + "fsl,elbc-fcm-nand"); + if (np2) { + static struct property nand_status = { + .name = "status", + .value = "disabled", + .length = sizeof("disabled"), + }; + + pr_info("p1022ds: disabling %pOF node", + np2); + of_update_property(np2, &nand_status); + of_node_put(np2); + } + + of_node_put(np); + } + + } + +#endif + + mpc85xx_smp_init(); + + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); + + pr_info("Freescale P1022 DS reference board\n"); +} + +machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices); + +define_machine(p1022_ds) { + .name = "P1022 DS", + .compatible = "fsl,p1022ds", + .setup_arch = p1022_ds_setup_arch, + .init_IRQ = p1022_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c new file mode 100644 index 0000000000..25ab6e9c14 --- /dev/null +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -0,0 +1,143 @@ +/* + * P1022 RDK board specific routines + * + * Copyright 2012 Freescale Semiconductor, Inc. + * + * Author: Timur Tabi + * + * Based on p1022_ds.c + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + +/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */ +#define CLKDVDR_PXCKEN 0x80000000 +#define CLKDVDR_PXCKINV 0x10000000 +#define CLKDVDR_PXCKDLY 0x06000000 +#define CLKDVDR_PXCLK_MASK 0x00FF0000 + +/** + * p1022rdk_set_pixel_clock: program the DIU's clock + * + * @pixclock: the wavelength, in picoseconds, of the clock + */ +void p1022rdk_set_pixel_clock(unsigned int pixclock) +{ + struct device_node *guts_np = NULL; + struct ccsr_guts __iomem *guts; + unsigned long freq; + u64 temp; + u32 pxclk; + + /* Map the global utilities registers. */ + guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts"); + if (!guts_np) { + pr_err("p1022rdk: missing global utilities device node\n"); + return; + } + + guts = of_iomap(guts_np, 0); + of_node_put(guts_np); + if (!guts) { + pr_err("p1022rdk: could not map global utilities device\n"); + return; + } + + /* Convert pixclock from a wavelength to a frequency */ + temp = 1000000000000ULL; + do_div(temp, pixclock); + freq = temp; + + /* + * 'pxclk' is the ratio of the platform clock to the pixel clock. + * This number is programmed into the CLKDVDR register, and the valid + * range of values is 2-255. + */ + pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq); + pxclk = clamp_t(u32, pxclk, 2, 255); + + /* Disable the pixel clock, and set it to non-inverted and no delay */ + clrbits32(&guts->clkdvdr, + CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK); + + /* Enable the clock and set the pxclk */ + setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16)); + + iounmap(guts); +} + +/** + * p1022rdk_valid_monitor_port: set the monitor port for sysfs + */ +enum fsl_diu_monitor_port +p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port) +{ + return FSL_DIU_PORT_DVI; +} + +#endif + +void __init p1022_rdk_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init p1022_rdk_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("p1022_rdk_setup_arch()", 0); + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + diu_ops.set_pixel_clock = p1022rdk_set_pixel_clock; + diu_ops.valid_monitor_port = p1022rdk_valid_monitor_port; +#endif + + mpc85xx_smp_init(); + + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); + + pr_info("Freescale / iVeia P1022 RDK reference board\n"); +} + +machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices); + +define_machine(p1022_rdk) { + .name = "P1022 RDK", + .compatible = "fsl,p1022rdk", + .setup_arch = p1022_rdk_setup_arch, + .init_IRQ = p1022_rdk_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c new file mode 100644 index 0000000000..e4fa8731fd --- /dev/null +++ b/arch/powerpc/platforms/85xx/p1023_rdb.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc. + * + * Author: Roy Zang + * + * Description: + * P1023 RDB Board Setup + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "smp.h" + +#include +#include + +#include "mpc85xx.h" + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init p1023_rdb_setup_arch(void) +{ + struct device_node *np; + + if (ppc_md.progress) + ppc_md.progress("p1023_rdb_setup_arch()", 0); + + /* Map BCSR area */ + np = of_find_node_by_name(NULL, "bcsr"); + if (np != NULL) { + static u8 __iomem *bcsr_regs; + + bcsr_regs = of_iomap(np, 0); + of_node_put(np); + + if (!bcsr_regs) { + printk(KERN_ERR + "BCSR: Failed to map bcsr register space\n"); + return; + } else { +#define BCSR15_I2C_BUS0_SEG_CLR 0x07 +#define BCSR15_I2C_BUS0_SEG2 0x02 +/* + * Note: Accessing exclusively i2c devices. + * + * The i2c controller selects initially ID EEPROM in the u-boot; + * but if menu configuration selects RTC support in the kernel, + * the i2c controller switches to select RTC chip in the kernel. + */ +#ifdef CONFIG_RTC_CLASS + /* Enable RTC chip on the segment #2 of i2c */ + clrbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG_CLR); + setbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG2); +#endif + + iounmap(bcsr_regs); + } + } + + mpc85xx_smp_init(); + + fsl_pci_assign_primary(); +} + +machine_arch_initcall(p1023_rdb, mpc85xx_common_publish_devices); + +static void __init p1023_rdb_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + + BUG_ON(mpic == NULL); + + mpic_init(mpic); +} + +define_machine(p1023_rdb) { + .name = "P1023 RDB", + .compatible = "fsl,P1023RDB", + .setup_arch = p1023_rdb_setup_arch, + .init_IRQ = p1023_rdb_pic_init, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif +}; diff --git a/arch/powerpc/platforms/85xx/p2020.c b/arch/powerpc/platforms/85xx/p2020.c new file mode 100644 index 0000000000..0e4d715145 --- /dev/null +++ b/arch/powerpc/platforms/85xx/p2020.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale P2020 board Setup + * + * Copyright 2007,2009,2012-2013 Freescale Semiconductor Inc. + * Copyright 2022-2023 Pali Rohár + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "smp.h" +#include "mpc85xx.h" + +static void __init p2020_pic_init(void) +{ + struct mpic *mpic; + int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU; + + mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC "); + + if (WARN_ON(!mpic)) + return; + + mpic_init(mpic); + mpc85xx_8259_init(); +} + +/* + * Setup the architecture + */ +static void __init p2020_setup_arch(void) +{ + swiotlb_detect_4g(); + fsl_pci_assign_primary(); + uli_init(); + mpc85xx_smp_init(); + mpc85xx_qe_par_io_init(); +} + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init p2020_probe(void) +{ + struct device_node *p2020_cpu; + + /* + * There is no common compatible string for all P2020 boards. + * The only common thing is "PowerPC,P2020@0" cpu node. + * So check for P2020 board via this cpu node. + */ + p2020_cpu = of_find_node_by_path("/cpus/PowerPC,P2020@0"); + of_node_put(p2020_cpu); + + return !!p2020_cpu; +} + +machine_arch_initcall(p2020, mpc85xx_common_publish_devices); + +define_machine(p2020) { + .name = "Freescale P2020", + .probe = p2020_probe, + .setup_arch = p2020_setup_arch, + .init_IRQ = p2020_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c new file mode 100644 index 0000000000..acd19c52ad --- /dev/null +++ b/arch/powerpc/platforms/85xx/ppa8548.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ppa8548 setup and early boot code. + * + * Copyright 2009 Prodrive B.V.. + * + * By Stef van Os (see MAINTAINERS for contact information) + * + * Based on the SBC8548 support - Copyright 2007 Wind River Systems Inc. + * Based on the MPC8548CDS support - Copyright 2005 Freescale Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +static void __init ppa8548_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init ppa8548_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("ppa8548_setup_arch()", 0); +} + +static void ppa8548_show_cpuinfo(struct seq_file *m) +{ + uint32_t svid, phid1; + + svid = mfspr(SPRN_SVR); + + seq_printf(m, "Vendor\t\t: Prodrive B.V.\n"); + seq_printf(m, "SVR\t\t: 0x%x\n", svid); + + /* Display cpu Pll setting */ + phid1 = mfspr(SPRN_HID1); + seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .name = "soc", }, + { .type = "soc", }, + { .compatible = "simple-bus", }, + { .compatible = "gianfar", }, + { .compatible = "fsl,srio", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + + return 0; +} +machine_device_initcall(ppa8548, declare_of_platform_devices); + +define_machine(ppa8548) { + .name = "ppa8548", + .compatible = "ppa8548", + .setup_arch = ppa8548_setup_arch, + .init_IRQ = ppa8548_pic_init, + .show_cpuinfo = ppa8548_show_cpuinfo, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c new file mode 100644 index 0000000000..3cd2f3bd42 --- /dev/null +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Paravirt target for a generic QEMU e500 machine + * + * This is intended to be a flexible device-tree-driven platform, not fixed + * to a particular piece of hardware or a particular spec of virtual hardware, + * beyond the assumption of an e500-family CPU. Some things are still hardcoded + * here, such as MPIC, but this is a limitation of the current code rather than + * an interface contract with QEMU. + * + * Copyright 2012 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "smp.h" +#include "mpc85xx.h" + +static void __init qemu_e500_pic_init(void) +{ + struct mpic *mpic; + unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | + MPIC_ENABLE_COREINT; + + mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC "); + + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +static void __init qemu_e500_setup_arch(void) +{ + ppc_md.progress("qemu_e500_setup_arch()", 0); + + fsl_pci_assign_primary(); + swiotlb_detect_4g(); + mpc85xx_smp_init(); +} + +machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices); + +define_machine(qemu_e500) { + .name = "QEMU e500", + .compatible = "fsl,qemu-e500", + .setup_arch = qemu_e500_setup_arch, + .init_IRQ = qemu_e500_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_coreint_irq, + .progress = udbg_progress, + .power_save = e500_idle, +}; diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c new file mode 100644 index 0000000000..751395cbf0 --- /dev/null +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Servergy CTS-1000 Setup + * + * Maintained by Ben Collins + * + * Copyright 2012 by Servergy, Inc. + */ + +#define pr_fmt(fmt) "gpio-halt: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static struct gpio_desc *halt_gpio; +static int halt_irq; + +static const struct of_device_id child_match[] = { + { + .compatible = "sgy,gpio-halt", + }, + {}, +}; + +static void gpio_halt_wfn(struct work_struct *work) +{ + /* Likely wont return */ + orderly_poweroff(true); +} +static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn); + +static void __noreturn gpio_halt_cb(void) +{ + pr_info("triggering GPIO.\n"); + + /* Probably wont return */ + gpiod_set_value(halt_gpio, 1); + + panic("Halt failed\n"); +} + +/* This IRQ means someone pressed the power button and it is waiting for us + * to handle the shutdown/poweroff. */ +static irqreturn_t gpio_halt_irq(int irq, void *__data) +{ + struct platform_device *pdev = __data; + + dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n"); + schedule_work(&gpio_halt_wq); + + return IRQ_HANDLED; +}; + +static int __gpio_halt_probe(struct platform_device *pdev, + struct device_node *halt_node) +{ + int err; + + halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node), + NULL, 0, GPIOD_OUT_LOW, "gpio-halt"); + err = PTR_ERR_OR_ZERO(halt_gpio); + if (err) { + dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err); + return err; + } + + /* Now get the IRQ which tells us when the power button is hit */ + halt_irq = irq_of_parse_and_map(halt_node, 0); + err = request_irq(halt_irq, gpio_halt_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "gpio-halt", pdev); + if (err) { + dev_err(&pdev->dev, "failed to request IRQ %d: %d\n", + halt_irq, err); + gpiod_put(halt_gpio); + halt_gpio = NULL; + return err; + } + + /* Register our halt function */ + ppc_md.halt = gpio_halt_cb; + pm_power_off = gpio_halt_cb; + + dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq); + + return 0; +} + +static int gpio_halt_probe(struct platform_device *pdev) +{ + struct device_node *halt_node; + int ret; + + if (!pdev->dev.of_node) + return -ENODEV; + + /* If there's no matching child, this isn't really an error */ + halt_node = of_find_matching_node(pdev->dev.of_node, child_match); + if (!halt_node) + return -ENODEV; + + ret = __gpio_halt_probe(pdev, halt_node); + of_node_put(halt_node); + + return ret; +} + +static int gpio_halt_remove(struct platform_device *pdev) +{ + free_irq(halt_irq, pdev); + cancel_work_sync(&gpio_halt_wq); + + ppc_md.halt = NULL; + pm_power_off = NULL; + + gpiod_put(halt_gpio); + halt_gpio = NULL; + + return 0; +} + +static const struct of_device_id gpio_halt_match[] = { + /* We match on the gpio bus itself and scan the children since they + * wont be matched against us. We know the bus wont match until it + * has been registered too. */ + { + .compatible = "fsl,qoriq-gpio", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, gpio_halt_match); + +static struct platform_driver gpio_halt_driver = { + .driver = { + .name = "gpio-halt", + .of_match_table = gpio_halt_match, + }, + .probe = gpio_halt_probe, + .remove = gpio_halt_remove, +}; + +module_platform_driver(gpio_halt_driver); + +MODULE_DESCRIPTION("Driver to support GPIO triggered system halt for Servergy CTS-1000 Systems."); +MODULE_VERSION("1.0"); +MODULE_AUTHOR("Ben Collins "); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c new file mode 100644 index 0000000000..40aa582068 --- /dev/null +++ b/arch/powerpc/platforms/85xx/smp.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Andy Fleming + * Kumar Gala + * + * Copyright 2006-2008, 2011-2012, 2015 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "smp.h" + +struct epapr_spin_table { + u32 addr_h; + u32 addr_l; + u32 r3_h; + u32 r3_l; + u32 reserved; + u32 pir; +}; + +static u64 timebase; +static int tb_req; +static int tb_valid; + +static void mpc85xx_give_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + hard_irq_disable(); + + while (!tb_req) + barrier(); + tb_req = 0; + + qoriq_pm_ops->freeze_time_base(true); +#ifdef CONFIG_PPC64 + /* + * e5500/e6500 have a workaround for erratum A-006958 in place + * that will reread the timebase until TBL is non-zero. + * That would be a bad thing when the timebase is frozen. + * + * Thus, we read it manually, and instead of checking that + * TBL is non-zero, we ensure that TB does not change. We don't + * do that for the main mftb implementation, because it requires + * a scratch register + */ + { + u64 prev; + + asm volatile("mfspr %0, %1" : "=r" (timebase) : + "i" (SPRN_TBRL)); + + do { + prev = timebase; + asm volatile("mfspr %0, %1" : "=r" (timebase) : + "i" (SPRN_TBRL)); + } while (prev != timebase); + } +#else + timebase = get_tb(); +#endif + mb(); + tb_valid = 1; + + while (tb_valid) + barrier(); + + qoriq_pm_ops->freeze_time_base(false); + + local_irq_restore(flags); +} + +static void mpc85xx_take_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + hard_irq_disable(); + + tb_req = 1; + while (!tb_valid) + barrier(); + + set_tb(timebase >> 32, timebase & 0xffffffff); + isync(); + tb_valid = 0; + + local_irq_restore(flags); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void smp_85xx_cpu_offline_self(void) +{ + unsigned int cpu = smp_processor_id(); + + local_irq_disable(); + hard_irq_disable(); + /* mask all irqs to prevent cpu wakeup */ + qoriq_pm_ops->irq_mask(cpu); + + idle_task_exit(); + + mtspr(SPRN_TCR, 0); + mtspr(SPRN_TSR, mfspr(SPRN_TSR)); + + generic_set_cpu_dead(cpu); + + cur_cpu_spec->cpu_down_flush(); + + qoriq_pm_ops->cpu_die(cpu); + + while (1) + ; +} + +static void qoriq_cpu_kill(unsigned int cpu) +{ + int i; + + for (i = 0; i < 500; i++) { + if (is_cpu_dead(cpu)) { +#ifdef CONFIG_PPC64 + paca_ptrs[cpu]->cpu_start = 0; +#endif + return; + } + msleep(20); + } + pr_err("CPU%d didn't die...\n", cpu); +} +#endif + +/* + * To keep it compatible with old boot program which uses + * cache-inhibit spin table, we need to flush the cache + * before accessing spin table to invalidate any staled data. + * We also need to flush the cache after writing to spin + * table to push data out. + */ +static inline void flush_spin_table(void *spin_table) +{ + flush_dcache_range((ulong)spin_table, + (ulong)spin_table + sizeof(struct epapr_spin_table)); +} + +static inline u32 read_spin_table_addr_l(void *spin_table) +{ + flush_dcache_range((ulong)spin_table, + (ulong)spin_table + sizeof(struct epapr_spin_table)); + return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); +} + +#ifdef CONFIG_PPC64 +static void wake_hw_thread(void *info) +{ + void fsl_secondary_thread_init(void); + unsigned long inia; + int cpu = *(const int *)info; + + inia = ppc_function_entry(fsl_secondary_thread_init); + book3e_start_thread(cpu_thread_in_core(cpu), inia); +} +#endif + +static int smp_85xx_start_cpu(int cpu) +{ + int ret = 0; + struct device_node *np; + const u64 *cpu_rel_addr; + unsigned long flags; + int ioremappable; + int hw_cpu = get_hard_smp_processor_id(cpu); + struct epapr_spin_table __iomem *spin_table; + + np = of_get_cpu_node(cpu, NULL); + cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL); + if (!cpu_rel_addr) { + pr_err("No cpu-release-addr for cpu %d\n", cpu); + return -ENOENT; + } + + /* + * A secondary core could be in a spinloop in the bootpage + * (0xfffff000), somewhere in highmem, or somewhere in lowmem. + * The bootpage and highmem can be accessed via ioremap(), but + * we need to directly access the spinloop if its in lowmem. + */ + ioremappable = *cpu_rel_addr > virt_to_phys(high_memory - 1); + + /* Map the spin table */ + if (ioremappable) + spin_table = ioremap_coherent(*cpu_rel_addr, + sizeof(struct epapr_spin_table)); + else + spin_table = phys_to_virt(*cpu_rel_addr); + + local_irq_save(flags); + hard_irq_disable(); + + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) + qoriq_pm_ops->cpu_up_prepare(cpu); + + /* if cpu is not spinning, reset it */ + if (read_spin_table_addr_l(spin_table) != 1) { + /* + * We don't set the BPTR register here since it already points + * to the boot page properly. + */ + mpic_reset_core(cpu); + + /* + * wait until core is ready... + * We need to invalidate the stale data, in case the boot + * loader uses a cache-inhibited spin table. + */ + if (!spin_event_timeout( + read_spin_table_addr_l(spin_table) == 1, + 10000, 100)) { + pr_err("timeout waiting for cpu %d to reset\n", + hw_cpu); + ret = -EAGAIN; + goto err; + } + } + + flush_spin_table(spin_table); + out_be32(&spin_table->pir, hw_cpu); +#ifdef CONFIG_PPC64 + out_be64((u64 *)(&spin_table->addr_h), + __pa(ppc_function_entry(generic_secondary_smp_init))); +#else +#ifdef CONFIG_PHYS_ADDR_T_64BIT + /* + * We need also to write addr_h to spin table for systems + * in which their physical memory start address was configured + * to above 4G, otherwise the secondary core can not get + * correct entry to start from. + */ + out_be32(&spin_table->addr_h, __pa(__early_start) >> 32); +#endif + out_be32(&spin_table->addr_l, __pa(__early_start)); +#endif + flush_spin_table(spin_table); +err: + local_irq_restore(flags); + + if (ioremappable) + iounmap(spin_table); + + return ret; +} + +static int smp_85xx_kick_cpu(int nr) +{ + int ret = 0; +#ifdef CONFIG_PPC64 + int primary = nr; +#endif + + WARN_ON(nr < 0 || nr >= num_possible_cpus()); + + pr_debug("kick CPU #%d\n", nr); + +#ifdef CONFIG_PPC64 + if (threads_per_core == 2) { + if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT))) + return -ENOENT; + + booting_thread_hwid = cpu_thread_in_core(nr); + primary = cpu_first_thread_sibling(nr); + + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) + qoriq_pm_ops->cpu_up_prepare(nr); + + /* + * If either thread in the core is online, use it to start + * the other. + */ + if (cpu_online(primary)) { + smp_call_function_single(primary, + wake_hw_thread, &nr, 1); + goto done; + } else if (cpu_online(primary + 1)) { + smp_call_function_single(primary + 1, + wake_hw_thread, &nr, 1); + goto done; + } + + /* + * If getting here, it means both threads in the core are + * offline. So start the primary thread, then it will start + * the thread specified in booting_thread_hwid, the one + * corresponding to nr. + */ + + } else if (threads_per_core == 1) { + /* + * If one core has only one thread, set booting_thread_hwid to + * an invalid value. + */ + booting_thread_hwid = INVALID_THREAD_HWID; + + } else if (threads_per_core > 2) { + pr_err("Do not support more than 2 threads per CPU."); + return -EINVAL; + } + + ret = smp_85xx_start_cpu(primary); + if (ret) + return ret; + +done: + paca_ptrs[nr]->cpu_start = 1; + generic_set_cpu_up(nr); + + return ret; +#else + ret = smp_85xx_start_cpu(nr); + if (ret) + return ret; + + generic_set_cpu_up(nr); + + return ret; +#endif +} + +struct smp_ops_t smp_85xx_ops = { + .cause_nmi_ipi = NULL, + .kick_cpu = smp_85xx_kick_cpu, + .cpu_bootable = smp_generic_cpu_bootable, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = generic_cpu_disable, + .cpu_die = generic_cpu_die, +#endif +#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64) + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +#endif +}; + +#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_PPC32 +atomic_t kexec_down_cpus = ATOMIC_INIT(0); + +static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) +{ + local_irq_disable(); + + if (secondary) { + cur_cpu_spec->cpu_down_flush(); + atomic_inc(&kexec_down_cpus); + /* loop forever */ + while (1); + } +} + +static void mpc85xx_smp_kexec_down(void *arg) +{ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0,1); +} +#else +static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) +{ + int cpu = smp_processor_id(); + int sibling = cpu_last_thread_sibling(cpu); + bool notified = false; + int disable_cpu; + int disable_threadbit = 0; + long start = mftb(); + long now; + + local_irq_disable(); + hard_irq_disable(); + mpic_teardown_this_cpu(secondary); + + if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) { + /* + * We enter the crash kernel on whatever cpu crashed, + * even if it's a secondary thread. If that's the case, + * disable the corresponding primary thread. + */ + disable_threadbit = 1; + disable_cpu = cpu_first_thread_sibling(cpu); + } else if (sibling != crashing_cpu && + cpu_thread_in_core(cpu) == 0 && + cpu_thread_in_core(sibling) != 0) { + disable_threadbit = 2; + disable_cpu = sibling; + } + + if (disable_threadbit) { + while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + now = mftb(); + if (!notified && now - start > 1000000) { + pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n", + __func__, smp_processor_id(), + disable_cpu, + paca_ptrs[disable_cpu]->kexec_state); + notified = true; + } + } + + if (notified) { + pr_info("%s: cpu %d done waiting\n", + __func__, disable_cpu); + } + + mtspr(SPRN_TENC, disable_threadbit); + while (mfspr(SPRN_TENSR) & disable_threadbit) + cpu_relax(); + } +} +#endif + +static void mpc85xx_smp_machine_kexec(struct kimage *image) +{ +#ifdef CONFIG_PPC32 + int timeout = INT_MAX; + int i, num_cpus = num_present_cpus(); + + if (image->type == KEXEC_TYPE_DEFAULT) + smp_call_function(mpc85xx_smp_kexec_down, NULL, 0); + + while ( (atomic_read(&kexec_down_cpus) != (num_cpus - 1)) && + ( timeout > 0 ) ) + { + timeout--; + } + + if ( !timeout ) + printk(KERN_ERR "Unable to bring down secondary cpu(s)"); + + for_each_online_cpu(i) + { + if ( i == smp_processor_id() ) continue; + mpic_reset_core(i); + } +#endif + + default_machine_kexec(image); +} +#endif /* CONFIG_KEXEC_CORE */ + +static void smp_85xx_setup_cpu(int cpu_nr) +{ + mpic_setup_this_cpu(); +} + +void __init mpc85xx_smp_init(void) +{ + struct device_node *np; + + + np = of_find_node_by_type(NULL, "open-pic"); + if (np) { + smp_85xx_ops.probe = smp_mpic_probe; + smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu; + smp_85xx_ops.message_pass = smp_mpic_message_pass; + } else + smp_85xx_ops.setup_cpu = NULL; + + if (cpu_has_feature(CPU_FTR_DBELL)) { + /* + * If left NULL, .message_pass defaults to + * smp_muxed_ipi_message_pass + */ + smp_85xx_ops.message_pass = NULL; + smp_85xx_ops.cause_ipi = doorbell_global_ipi; + smp_85xx_ops.probe = NULL; + } + +#ifdef CONFIG_FSL_CORENET_RCPM + /* Assign a value to qoriq_pm_ops on PPC_E500MC */ + fsl_rcpm_init(); +#else + /* Assign a value to qoriq_pm_ops on !PPC_E500MC */ + mpc85xx_setup_pmc(); +#endif + if (qoriq_pm_ops) { + smp_85xx_ops.give_timebase = mpc85xx_give_timebase; + smp_85xx_ops.take_timebase = mpc85xx_take_timebase; +#ifdef CONFIG_HOTPLUG_CPU + smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; + smp_85xx_ops.cpu_die = qoriq_cpu_kill; +#endif + } + smp_ops = &smp_85xx_ops; + +#ifdef CONFIG_KEXEC_CORE + ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down; + ppc_md.machine_kexec = mpc85xx_smp_machine_kexec; +#endif +} diff --git a/arch/powerpc/platforms/85xx/smp.h b/arch/powerpc/platforms/85xx/smp.h new file mode 100644 index 0000000000..3936ff6dfb --- /dev/null +++ b/arch/powerpc/platforms/85xx/smp.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef POWERPC_85XX_SMP_H_ +#define POWERPC_85XX_SMP_H_ 1 + +#include + +#ifdef CONFIG_SMP +void __init mpc85xx_smp_init(void); +int __init mpc85xx_setup_pmc(void); +#else +static inline void mpc85xx_smp_init(void) +{ + /* Nothing to do */ +} +#endif + +#endif /* not POWERPC_85XX_SMP_H_ */ diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c new file mode 100644 index 0000000000..403367b318 --- /dev/null +++ b/arch/powerpc/platforms/85xx/socrates.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2008 Emcraft Systems + * Sergei Poselenov + * + * Based on MPC8560 ADS and arch/ppc tqm85xx ports + * + * Maintained by Kumar Gala (see MAINTAINERS for contact information) + * + * Copyright 2008 Freescale Semiconductor Inc. + * + * Copyright (c) 2005-2006 DENX Software Engineering + * Stefan Roese + * + * Based on original work by + * Kumar Gala + * Copyright 2004 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc85xx.h" +#include "socrates_fpga_pic.h" + +static void __init socrates_pic_init(void) +{ + struct device_node *np; + + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); + + np = of_find_compatible_node(NULL, NULL, "abb,socrates-fpga-pic"); + if (!np) { + printk(KERN_ERR "Could not find socrates-fpga-pic node\n"); + return; + } + socrates_fpga_pic_init(np); + of_node_put(np); +} + +/* + * Setup the architecture + */ +static void __init socrates_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("socrates_setup_arch()", 0); + + fsl_pci_assign_primary(); +} + +machine_arch_initcall(socrates, mpc85xx_common_publish_devices); + +define_machine(socrates) { + .name = "Socrates", + .compatible = "abb,socrates", + .setup_arch = socrates_setup_arch, + .init_IRQ = socrates_pic_init, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c new file mode 100644 index 0000000000..baa12eff6d --- /dev/null +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2008 Ilya Yanok, Emcraft Systems + */ + +#include +#include +#include +#include + +/* + * The FPGA supports 9 interrupt sources, which can be routed to 3 + * interrupt request lines of the MPIC. The line to be used can be + * specified through the third cell of FDT property "interrupts". + */ + +#define SOCRATES_FPGA_NUM_IRQS 9 + +#define FPGA_PIC_IRQCFG (0x0) +#define FPGA_PIC_IRQMASK(n) (0x4 + 0x4 * (n)) + +#define SOCRATES_FPGA_IRQ_MASK ((1 << SOCRATES_FPGA_NUM_IRQS) - 1) + +struct socrates_fpga_irq_info { + unsigned int irq_line; + int type; +}; + +/* + * Interrupt routing and type table + * + * IRQ_TYPE_NONE means the interrupt type is configurable, + * otherwise it's fixed to the specified value. + */ +static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = { + [0] = {0, IRQ_TYPE_NONE}, + [1] = {0, IRQ_TYPE_LEVEL_HIGH}, + [2] = {0, IRQ_TYPE_LEVEL_LOW}, + [3] = {0, IRQ_TYPE_NONE}, + [4] = {0, IRQ_TYPE_NONE}, + [5] = {0, IRQ_TYPE_NONE}, + [6] = {0, IRQ_TYPE_NONE}, + [7] = {0, IRQ_TYPE_NONE}, + [8] = {0, IRQ_TYPE_LEVEL_HIGH}, +}; + +static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock); + +static void __iomem *socrates_fpga_pic_iobase; +static struct irq_domain *socrates_fpga_pic_irq_host; +static unsigned int socrates_fpga_irqs[3]; + +static inline uint32_t socrates_fpga_pic_read(int reg) +{ + return in_be32(socrates_fpga_pic_iobase + reg); +} + +static inline void socrates_fpga_pic_write(int reg, uint32_t val) +{ + out_be32(socrates_fpga_pic_iobase + reg, val); +} + +static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq) +{ + uint32_t cause; + unsigned long flags; + int i; + + /* Check irq line routed to the MPIC */ + for (i = 0; i < 3; i++) { + if (irq == socrates_fpga_irqs[i]) + break; + } + if (i == 3) + return 0; + + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + cause = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(i)); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); + for (i = SOCRATES_FPGA_NUM_IRQS - 1; i >= 0; i--) { + if (cause >> (i + 16)) + break; + } + return irq_linear_revmap(socrates_fpga_pic_irq_host, + (irq_hw_number_t)i); +} + +static void socrates_fpga_pic_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int irq = irq_desc_get_irq(desc); + unsigned int cascade_irq; + + /* + * See if we actually have an interrupt, call generic handling code if + * we do. + */ + cascade_irq = socrates_fpga_pic_get_irq(irq); + + if (cascade_irq) + generic_handle_irq(cascade_irq); + chip->irq_eoi(&desc->irq_data); +} + +static void socrates_fpga_pic_ack(struct irq_data *d) +{ + unsigned long flags; + unsigned int irq_line, hwirq = irqd_to_hwirq(d); + uint32_t mask; + + irq_line = fpga_irqs[hwirq].irq_line; + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) + & SOCRATES_FPGA_IRQ_MASK; + mask |= (1 << (hwirq + 16)); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); +} + +static void socrates_fpga_pic_mask(struct irq_data *d) +{ + unsigned long flags; + unsigned int hwirq = irqd_to_hwirq(d); + int irq_line; + u32 mask; + + irq_line = fpga_irqs[hwirq].irq_line; + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) + & SOCRATES_FPGA_IRQ_MASK; + mask &= ~(1 << hwirq); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); +} + +static void socrates_fpga_pic_mask_ack(struct irq_data *d) +{ + unsigned long flags; + unsigned int hwirq = irqd_to_hwirq(d); + int irq_line; + u32 mask; + + irq_line = fpga_irqs[hwirq].irq_line; + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) + & SOCRATES_FPGA_IRQ_MASK; + mask &= ~(1 << hwirq); + mask |= (1 << (hwirq + 16)); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); +} + +static void socrates_fpga_pic_unmask(struct irq_data *d) +{ + unsigned long flags; + unsigned int hwirq = irqd_to_hwirq(d); + int irq_line; + u32 mask; + + irq_line = fpga_irqs[hwirq].irq_line; + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) + & SOCRATES_FPGA_IRQ_MASK; + mask |= (1 << hwirq); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); +} + +static void socrates_fpga_pic_eoi(struct irq_data *d) +{ + unsigned long flags; + unsigned int hwirq = irqd_to_hwirq(d); + int irq_line; + u32 mask; + + irq_line = fpga_irqs[hwirq].irq_line; + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line)) + & SOCRATES_FPGA_IRQ_MASK; + mask |= (1 << (hwirq + 16)); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); +} + +static int socrates_fpga_pic_set_type(struct irq_data *d, + unsigned int flow_type) +{ + unsigned long flags; + unsigned int hwirq = irqd_to_hwirq(d); + int polarity; + u32 mask; + + if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE) + return -EINVAL; + + switch (flow_type & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_LEVEL_HIGH: + polarity = 1; + break; + case IRQ_TYPE_LEVEL_LOW: + polarity = 0; + break; + default: + return -EINVAL; + } + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + mask = socrates_fpga_pic_read(FPGA_PIC_IRQCFG); + if (polarity) + mask |= (1 << hwirq); + else + mask &= ~(1 << hwirq); + socrates_fpga_pic_write(FPGA_PIC_IRQCFG, mask); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); + return 0; +} + +static struct irq_chip socrates_fpga_pic_chip = { + .name = "FPGA-PIC", + .irq_ack = socrates_fpga_pic_ack, + .irq_mask = socrates_fpga_pic_mask, + .irq_mask_ack = socrates_fpga_pic_mask_ack, + .irq_unmask = socrates_fpga_pic_unmask, + .irq_eoi = socrates_fpga_pic_eoi, + .irq_set_type = socrates_fpga_pic_set_type, +}; + +static int socrates_fpga_pic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hwirq) +{ + /* All interrupts are LEVEL sensitive */ + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_chip_and_handler(virq, &socrates_fpga_pic_chip, + handle_fasteoi_irq); + + return 0; +} + +static int socrates_fpga_pic_host_xlate(struct irq_domain *h, + struct device_node *ct, const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + struct socrates_fpga_irq_info *fpga_irq = &fpga_irqs[intspec[0]]; + + *out_hwirq = intspec[0]; + if (fpga_irq->type == IRQ_TYPE_NONE) { + /* type is configurable */ + if (intspec[1] != IRQ_TYPE_LEVEL_LOW && + intspec[1] != IRQ_TYPE_LEVEL_HIGH) { + pr_warn("FPGA PIC: invalid irq type, setting default active low\n"); + *out_flags = IRQ_TYPE_LEVEL_LOW; + } else { + *out_flags = intspec[1]; + } + } else { + /* type is fixed */ + *out_flags = fpga_irq->type; + } + + /* Use specified interrupt routing */ + if (intspec[2] <= 2) + fpga_irq->irq_line = intspec[2]; + else + pr_warn("FPGA PIC: invalid irq routing\n"); + + return 0; +} + +static const struct irq_domain_ops socrates_fpga_pic_host_ops = { + .map = socrates_fpga_pic_host_map, + .xlate = socrates_fpga_pic_host_xlate, +}; + +void __init socrates_fpga_pic_init(struct device_node *pic) +{ + unsigned long flags; + int i; + + /* Setup an irq_domain structure */ + socrates_fpga_pic_irq_host = irq_domain_add_linear(pic, + SOCRATES_FPGA_NUM_IRQS, &socrates_fpga_pic_host_ops, NULL); + if (socrates_fpga_pic_irq_host == NULL) { + pr_err("FPGA PIC: Unable to allocate host\n"); + return; + } + + for (i = 0; i < 3; i++) { + socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i); + if (!socrates_fpga_irqs[i]) { + pr_warn("FPGA PIC: can't get irq%d\n", i); + continue; + } + irq_set_chained_handler(socrates_fpga_irqs[i], + socrates_fpga_pic_cascade); + } + + socrates_fpga_pic_iobase = of_iomap(pic, 0); + + raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(0), + SOCRATES_FPGA_IRQ_MASK << 16); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(1), + SOCRATES_FPGA_IRQ_MASK << 16); + socrates_fpga_pic_write(FPGA_PIC_IRQMASK(2), + SOCRATES_FPGA_IRQ_MASK << 16); + raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags); + + pr_info("FPGA PIC: Setting up Socrates FPGA PIC\n"); +} diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h new file mode 100644 index 0000000000..c50b23794a --- /dev/null +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2008 Ilya Yanok, Emcraft Systems + */ + +#ifndef SOCRATES_FPGA_PIC_H +#define SOCRATES_FPGA_PIC_H + +void __init socrates_fpga_pic_init(struct device_node *pic); + +#endif diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c new file mode 100644 index 0000000000..c10efc4589 --- /dev/null +++ b/arch/powerpc/platforms/85xx/stx_gp3.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Based on MPC8560 ADS and arch/ppc stx_gp3 ports + * + * Maintained by Kumar Gala (see MAINTAINERS for contact information) + * + * Copyright 2008 Freescale Semiconductor Inc. + * + * Dan Malek + * Copyright 2004 Embedded Edge, LLC + * + * Copied from mpc8560_ads.c + * Copyright 2002, 2003 Motorola Inc. + * + * Ported to 2.6, Matt Porter + * Copyright 2004-2005 MontaVista Software, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc85xx.h" + +#ifdef CONFIG_CPM2 +#include +#endif /* CONFIG_CPM2 */ + +static void __init stx_gp3_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); + + mpc85xx_cpm2_pic_init(); +} + +/* + * Setup the architecture + */ +static void __init stx_gp3_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("stx_gp3_setup_arch()", 0); + + fsl_pci_assign_primary(); + +#ifdef CONFIG_CPM2 + cpm2_reset(); +#endif +} + +static void stx_gp3_show_cpuinfo(struct seq_file *m) +{ + uint pvid, svid, phid1; + + pvid = mfspr(SPRN_PVR); + svid = mfspr(SPRN_SVR); + + seq_printf(m, "Vendor\t\t: RPC Electronics STx\n"); + seq_printf(m, "PVR\t\t: 0x%x\n", pvid); + seq_printf(m, "SVR\t\t: 0x%x\n", svid); + + /* Display cpu Pll setting */ + phid1 = mfspr(SPRN_HID1); + seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); +} + +machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices); + +define_machine(stx_gp3) { + .name = "STX GP3", + .compatible = "stx,gp3-8560", + .setup_arch = stx_gp3_setup_arch, + .init_IRQ = stx_gp3_pic_init, + .show_cpuinfo = stx_gp3_show_cpuinfo, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c new file mode 100644 index 0000000000..767eed98a0 --- /dev/null +++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * T1042 platform DIU operation + * + * Copyright 2014 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include + +#include + +/*DIU Pixel ClockCR offset in scfg*/ +#define CCSR_SCFG_PIXCLKCR 0x28 + +/* DIU Pixel Clock bits of the PIXCLKCR */ +#define PIXCLKCR_PXCKEN 0x80000000 +#define PIXCLKCR_PXCKINV 0x40000000 +#define PIXCLKCR_PXCKDLY 0x0000FF00 +#define PIXCLKCR_PXCLK_MASK 0x00FF0000 + +/* Some CPLD register definitions */ +#define CPLD_DIUCSR 0x16 +#define CPLD_DIUCSR_DVIEN 0x80 +#define CPLD_DIUCSR_BACKLIGHT 0x0f + +struct device_node *cpld_node; + +/** + * t1042rdb_set_monitor_port: switch the output to a different monitor port + */ +static void t1042rdb_set_monitor_port(enum fsl_diu_monitor_port port) +{ + void __iomem *cpld_base; + + cpld_base = of_iomap(cpld_node, 0); + if (!cpld_base) { + pr_err("%s: Could not map cpld registers\n", __func__); + goto exit; + } + + switch (port) { + case FSL_DIU_PORT_DVI: + /* Enable the DVI(HDMI) port, disable the DFP and + * the backlight + */ + clrbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_DVIEN); + break; + case FSL_DIU_PORT_LVDS: + /* + * LVDS also needs backlight enabled, otherwise the display + * will be blank. + */ + /* Enable the DFP port, disable the DVI*/ + setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 8); + setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 4); + setbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_BACKLIGHT); + break; + default: + pr_err("%s: Unsupported monitor port %i\n", __func__, port); + } + + iounmap(cpld_base); +exit: + of_node_put(cpld_node); +} + +/** + * t1042rdb_set_pixel_clock: program the DIU's clock + * @pixclock: pixel clock in ps (pico seconds) + */ +static void t1042rdb_set_pixel_clock(unsigned int pixclock) +{ + struct device_node *scfg_np; + void __iomem *scfg; + unsigned long freq; + u64 temp; + u32 pxclk; + + scfg_np = of_find_compatible_node(NULL, NULL, "fsl,t1040-scfg"); + if (!scfg_np) { + pr_err("%s: Missing scfg node. Can not display video.\n", + __func__); + return; + } + + scfg = of_iomap(scfg_np, 0); + of_node_put(scfg_np); + if (!scfg) { + pr_err("%s: Could not map device. Can not display video.\n", + __func__); + return; + } + + /* Convert pixclock into frequency */ + temp = 1000000000000ULL; + do_div(temp, pixclock); + freq = temp; + + /* + * 'pxclk' is the ratio of the platform clock to the pixel clock. + * This number is programmed into the PIXCLKCR register, and the valid + * range of values is 2-255. + */ + pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq); + pxclk = clamp_t(u32, pxclk, 2, 255); + + /* Disable the pixel clock, and set it to non-inverted and no delay */ + clrbits32(scfg + CCSR_SCFG_PIXCLKCR, + PIXCLKCR_PXCKEN | PIXCLKCR_PXCKDLY | PIXCLKCR_PXCLK_MASK); + + /* Enable the clock and set the pxclk */ + setbits32(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16)); + + iounmap(scfg); +} + +/** + * t1042rdb_valid_monitor_port: set the monitor port for sysfs + */ +static enum fsl_diu_monitor_port +t1042rdb_valid_monitor_port(enum fsl_diu_monitor_port port) +{ + switch (port) { + case FSL_DIU_PORT_DVI: + case FSL_DIU_PORT_LVDS: + return port; + default: + return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */ + } +} + +static int __init t1042rdb_diu_init(void) +{ + cpld_node = of_find_compatible_node(NULL, NULL, "fsl,t1042rdb-cpld"); + if (!cpld_node) + return 0; + + diu_ops.set_monitor_port = t1042rdb_set_monitor_port; + diu_ops.set_pixel_clock = t1042rdb_set_pixel_clock; + diu_ops.valid_monitor_port = t1042rdb_valid_monitor_port; + + return 0; +} + +early_initcall(t1042rdb_diu_init); + +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c new file mode 100644 index 0000000000..6be1b9809d --- /dev/null +++ b/arch/powerpc/platforms/85xx/tqm85xx.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Based on MPC8560 ADS and arch/ppc tqm85xx ports + * + * Maintained by Kumar Gala (see MAINTAINERS for contact information) + * + * Copyright 2008 Freescale Semiconductor Inc. + * + * Copyright (c) 2005-2006 DENX Software Engineering + * Stefan Roese + * + * Based on original work by + * Kumar Gala + * Copyright 2004 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc85xx.h" + +#ifdef CONFIG_CPM2 +#include +#endif /* CONFIG_CPM2 */ + +static void __init tqm85xx_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, + MPIC_BIG_ENDIAN, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); + + mpc85xx_cpm2_pic_init(); +} + +/* + * Setup the architecture + */ +static void __init tqm85xx_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("tqm85xx_setup_arch()", 0); + +#ifdef CONFIG_CPM2 + cpm2_reset(); +#endif + + fsl_pci_assign_primary(); +} + +static void tqm85xx_show_cpuinfo(struct seq_file *m) +{ + uint pvid, svid, phid1; + + pvid = mfspr(SPRN_PVR); + svid = mfspr(SPRN_SVR); + + seq_printf(m, "Vendor\t\t: TQ Components\n"); + seq_printf(m, "PVR\t\t: 0x%x\n", pvid); + seq_printf(m, "SVR\t\t: 0x%x\n", svid); + + /* Display cpu Pll setting */ + phid1 = mfspr(SPRN_HID1); + seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); +} + +static void tqm85xx_ti1520_fixup(struct pci_dev *pdev) +{ + unsigned int val; + + /* Do not do the fixup on other platforms! */ + if (!machine_is(tqm85xx)) + return; + + dev_info(&pdev->dev, "Using TI 1520 fixup on TQM85xx\n"); + + /* + * Enable P2CCLK bit in system control register + * to enable CLOCK output to power chip + */ + pci_read_config_dword(pdev, 0x80, &val); + pci_write_config_dword(pdev, 0x80, val | (1 << 27)); + +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520, + tqm85xx_ti1520_fixup); + +machine_arch_initcall(tqm85xx, mpc85xx_common_publish_devices); + +static const char * const board[] __initconst = { + "tqc,tqm8540", + "tqc,tqm8541", + "tqc,tqm8548", + "tqc,tqm8555", + "tqc,tqm8560", + NULL +}; + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init tqm85xx_probe(void) +{ + return of_device_compatible_match(of_root, board); +} + +define_machine(tqm85xx) { + .name = "TQM85xx", + .probe = tqm85xx_probe, + .setup_arch = tqm85xx_setup_arch, + .init_IRQ = tqm85xx_pic_init, + .show_cpuinfo = tqm85xx_show_cpuinfo, + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c new file mode 100644 index 0000000000..c0a0456f16 --- /dev/null +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc. + * + * Author: Michael Johnston + * + * Description: + * TWR-P102x Board Setup + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +static void __init twr_p1025_pic_init(void) +{ + struct mpic *mpic; + + mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +/* ************************************************************************ + * + * Setup the architecture + * + */ +static void __init twr_p1025_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("twr_p1025_setup_arch()", 0); + + mpc85xx_smp_init(); + + fsl_pci_assign_primary(); + +#ifdef CONFIG_QUICC_ENGINE + mpc85xx_qe_par_io_init(); + +#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE) + if (machine_is(twr_p1025)) { + struct ccsr_guts __iomem *guts; + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts"); + if (np) { + guts = of_iomap(np, 0); + if (!guts) + pr_err("twr_p1025: could not map global utilities register\n"); + else { + /* P1025 has pins muxed for QE and other functions. To + * enable QE UEC mode, we need to set bit QE0 for UCC1 + * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9 + * and QE12 for QE MII management signals in PMUXCR + * register. + * Set QE mux bits in PMUXCR */ + setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) | + MPC85xx_PMUXCR_QE(3) | + MPC85xx_PMUXCR_QE(9) | + MPC85xx_PMUXCR_QE(12)); + iounmap(guts); + +#if IS_ENABLED(CONFIG_SERIAL_QE) + /* On P1025TWR board, the UCC7 acted as UART port. + * However, The UCC7's CTS pin is low level in default, + * it will impact the transmission in full duplex + * communication. So disable the Flow control pin PA18. + * The UCC7 UART just can use RXD and TXD pins. + */ + par_io_config_pin(0, 18, 0, 0, 0, 0); +#endif + /* Drive PB29 to CPLD low - CPLD will then change + * muxing from LBC to QE */ + par_io_config_pin(1, 29, 1, 0, 0, 0); + par_io_data_set(1, 29, 0); + } + of_node_put(np); + } + } +#endif +#endif /* CONFIG_QUICC_ENGINE */ + + pr_info("TWR-P1025 board from Freescale Semiconductor\n"); +} + +machine_arch_initcall(twr_p1025, mpc85xx_common_publish_devices); + +define_machine(twr_p1025) { + .name = "TWR-P1025", + .compatible = "fsl,TWR-P1025", + .setup_arch = twr_p1025_setup_arch, + .init_IRQ = twr_p1025_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c new file mode 100644 index 0000000000..45f257fc1a --- /dev/null +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2009 Extreme Engineering Solutions, Inc. + * + * X-ES board-specific functionality + * + * Based on mpc85xx_ds code from Freescale Semiconductor, Inc. + * + * Author: Nate Case + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +/* A few bit definitions needed for fixups on some boards */ +#define MPC85xx_L2CTL_L2E 0x80000000 /* L2 enable */ +#define MPC85xx_L2CTL_L2I 0x40000000 /* L2 flash invalidate */ +#define MPC85xx_L2CTL_L2SIZ_MASK 0x30000000 /* L2 SRAM size (R/O) */ + +void __init xes_mpc85xx_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base) +{ + volatile uint32_t ctl, tmp; + + asm volatile("msync; isync"); + tmp = in_be32(l2_base); + + /* + * xMon may have enabled part of L2 as SRAM, so we need to set it + * up for all cache mode just to be safe. + */ + printk(KERN_INFO "xes_mpc85xx: Enabling L2 as cache\n"); + + ctl = MPC85xx_L2CTL_L2E | MPC85xx_L2CTL_L2I; + if (of_machine_is_compatible("MPC8540") || + of_machine_is_compatible("MPC8560")) + /* + * Assume L2 SRAM is used fully for cache, so set + * L2BLKSZ (bits 4:5) to match L2SIZ (bits 2:3). + */ + ctl |= (tmp & MPC85xx_L2CTL_L2SIZ_MASK) >> 2; + + asm volatile("msync; isync"); + out_be32(l2_base, ctl); + asm volatile("msync; isync"); +} + +static void __init xes_mpc85xx_fixups(void) +{ + struct device_node *np; + int err; + + /* + * Legacy xMon firmware on some X-ES boards does not enable L2 + * as cache. We must ensure that they get enabled here. + */ + for_each_node_by_name(np, "l2-cache-controller") { + struct resource r[2]; + void __iomem *l2_base; + + /* Only MPC8548, MPC8540, and MPC8560 boards are affected */ + if (!of_device_is_compatible(np, + "fsl,mpc8548-l2-cache-controller") && + !of_device_is_compatible(np, + "fsl,mpc8540-l2-cache-controller") && + !of_device_is_compatible(np, + "fsl,mpc8560-l2-cache-controller")) + continue; + + err = of_address_to_resource(np, 0, &r[0]); + if (err) { + printk(KERN_WARNING "xes_mpc85xx: Could not get " + "resource for device tree node '%pOF'", + np); + continue; + } + + l2_base = ioremap(r[0].start, resource_size(&r[0])); + + xes_mpc85xx_configure_l2(l2_base); + } +} + +/* + * Setup the architecture + */ +static void __init xes_mpc85xx_setup_arch(void) +{ + struct device_node *root; + const char *model = "Unknown"; + + root = of_find_node_by_path("/"); + if (root == NULL) + return; + + model = of_get_property(root, "model", NULL); + + printk(KERN_INFO "X-ES MPC85xx-based single-board computer: %s\n", + model + strlen("xes,")); + + xes_mpc85xx_fixups(); + + mpc85xx_smp_init(); + + fsl_pci_assign_primary(); +} + +machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices); +machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices); +machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices); + +define_machine(xes_mpc8572) { + .name = "X-ES MPC8572", + .compatible = "xes,MPC8572", + .setup_arch = xes_mpc85xx_setup_arch, + .init_IRQ = xes_mpc85xx_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(xes_mpc8548) { + .name = "X-ES MPC8548", + .compatible = "xes,MPC8548", + .setup_arch = xes_mpc85xx_setup_arch, + .init_IRQ = xes_mpc85xx_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; + +define_machine(xes_mpc8540) { + .name = "X-ES MPC8540", + .compatible = "xes,MPC8540", + .setup_arch = xes_mpc85xx_setup_arch, + .init_IRQ = xes_mpc85xx_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig new file mode 100644 index 0000000000..67467cd6f3 --- /dev/null +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 +menuconfig PPC_86xx + bool "86xx-based boards" + depends on PPC_BOOK3S_32 + select FSL_SOC + select ALTIVEC + help + The Freescale E600 SoCs have 74xx cores. + +if PPC_86xx + +config GEF_PPC9A + bool "GE PPC9A" + select DEFAULT_UIMAGE + select MMIO_NVRAM + select GPIOLIB + select GE_FPGA + help + This option enables support for the GE PPC9A. + +config GEF_SBC310 + bool "GE SBC310" + select DEFAULT_UIMAGE + select MMIO_NVRAM + select GPIOLIB + select GE_FPGA + help + This option enables support for the GE SBC310. + +config GEF_SBC610 + bool "GE SBC610" + select DEFAULT_UIMAGE + select MMIO_NVRAM + select GPIOLIB + select GE_FPGA + select HAVE_RAPIDIO + help + This option enables support for the GE SBC610. + +config MVME7100 + bool "Artesyn MVME7100" + help + This option enables support for the Emerson/Artesyn MVME7100 board. + +endif + +config MPC8641 + bool + select HAVE_PCI + select FSL_PCI if PCI + select PPC_UDBG_16550 + select MPIC + default y if GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \ + || MVME7100 + +config MPC8610 + bool + select HAVE_PCI + select FSL_PCI if PCI + select PPC_UDBG_16550 + select MPIC diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile new file mode 100644 index 0000000000..dafbc037ff --- /dev/null +++ b/arch/powerpc/platforms/86xx/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PowerPC 86xx linux kernel. +# + +obj-y := pic.o common.o +obj-$(CONFIG_SMP) += mpc86xx_smp.o +obj-$(CONFIG_GEF_SBC610) += gef_sbc610.o +obj-$(CONFIG_GEF_SBC310) += gef_sbc310.o +obj-$(CONFIG_GEF_PPC9A) += gef_ppc9a.o +obj-$(CONFIG_MVME7100) += mvme7100.o diff --git a/arch/powerpc/platforms/86xx/common.c b/arch/powerpc/platforms/86xx/common.c new file mode 100644 index 0000000000..a4a5505276 --- /dev/null +++ b/arch/powerpc/platforms/86xx/common.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Routines common to most mpc86xx-based boards. + */ + +#include +#include +#include +#include +#include + +#include "mpc86xx.h" + +static const struct of_device_id mpc86xx_common_ids[] __initconst = { + { .type = "soc", }, + { .compatible = "soc", }, + { .compatible = "simple-bus", }, + { .name = "localbus", }, + { .compatible = "gianfar", }, + { .compatible = "fsl,mpc8641-pcie", }, + {}, +}; + +int __init mpc86xx_common_publish_devices(void) +{ + return of_platform_bus_probe(NULL, mpc86xx_common_ids, NULL); +} + +long __init mpc86xx_time_init(void) +{ + unsigned int temp; + + /* Set the time base to zero */ + mtspr(SPRN_TBWL, 0); + mtspr(SPRN_TBWU, 0); + + temp = mfspr(SPRN_HID0); + temp |= HID0_TBEN; + mtspr(SPRN_HID0, temp); + isync(); + + return 0; +} diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c new file mode 100644 index 0000000000..f7f98cca7b --- /dev/null +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GE PPC9A board support + * + * Author: Martyn Welch + * + * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc. + * + * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines) + * Copyright 2006 Freescale Semiconductor Inc. + * + * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "mpc86xx.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG (fmt...) do { printk(KERN_ERR "PPC9A: " fmt); } while (0) +#else +#define DBG (fmt...) do { } while (0) +#endif + +void __iomem *ppc9a_regs; + +static void __init gef_ppc9a_init_irq(void) +{ + struct device_node *cascade_node = NULL; + + mpc86xx_init_irq(); + + /* + * There is a simple interrupt handler in the main FPGA, this needs + * to be cascaded into the MPIC + */ + cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic-1.00"); + if (!cascade_node) { + printk(KERN_WARNING "PPC9A: No FPGA PIC\n"); + return; + } + + gef_pic_init(cascade_node); + of_node_put(cascade_node); +} + +static void __init gef_ppc9a_setup_arch(void) +{ + struct device_node *regs; + + printk(KERN_INFO "GE Intelligent Platforms PPC9A 6U VME SBC\n"); + +#ifdef CONFIG_SMP + mpc86xx_smp_init(); +#endif + + fsl_pci_assign_primary(); + + /* Remap basic board registers */ + regs = of_find_compatible_node(NULL, NULL, "gef,ppc9a-fpga-regs"); + if (regs) { + ppc9a_regs = of_iomap(regs, 0); + if (ppc9a_regs == NULL) + printk(KERN_WARNING "Unable to map board registers\n"); + of_node_put(regs); + } + +#if defined(CONFIG_MMIO_NVRAM) + mmio_nvram_init(); +#endif +} + +/* Return the PCB revision */ +static unsigned int gef_ppc9a_get_pcb_rev(void) +{ + unsigned int reg; + + reg = ioread32be(ppc9a_regs); + return (reg >> 16) & 0xff; +} + +/* Return the board (software) revision */ +static unsigned int gef_ppc9a_get_board_rev(void) +{ + unsigned int reg; + + reg = ioread32be(ppc9a_regs); + return (reg >> 8) & 0xff; +} + +/* Return the FPGA revision */ +static unsigned int gef_ppc9a_get_fpga_rev(void) +{ + unsigned int reg; + + reg = ioread32be(ppc9a_regs); + return reg & 0xf; +} + +/* Return VME Geographical Address */ +static unsigned int gef_ppc9a_get_vme_geo_addr(void) +{ + unsigned int reg; + + reg = ioread32be(ppc9a_regs + 0x4); + return reg & 0x1f; +} + +/* Return VME System Controller Status */ +static unsigned int gef_ppc9a_get_vme_is_syscon(void) +{ + unsigned int reg; + + reg = ioread32be(ppc9a_regs + 0x4); + return (reg >> 9) & 0x1; +} + +static void gef_ppc9a_show_cpuinfo(struct seq_file *m) +{ + uint svid = mfspr(SPRN_SVR); + + seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n"); + + seq_printf(m, "Revision\t: %u%c\n", gef_ppc9a_get_pcb_rev(), + ('A' + gef_ppc9a_get_board_rev())); + seq_printf(m, "FPGA Revision\t: %u\n", gef_ppc9a_get_fpga_rev()); + + seq_printf(m, "SVR\t\t: 0x%x\n", svid); + + seq_printf(m, "VME geo. addr\t: %u\n", gef_ppc9a_get_vme_geo_addr()); + + seq_printf(m, "VME syscon\t: %s\n", + gef_ppc9a_get_vme_is_syscon() ? "yes" : "no"); +} + +static void gef_ppc9a_nec_fixup(struct pci_dev *pdev) +{ + unsigned int val; + + /* Do not do the fixup on other platforms! */ + if (!machine_is(gef_ppc9a)) + return; + + printk(KERN_INFO "Running NEC uPD720101 Fixup\n"); + + /* Ensure ports 1, 2, 3, 4 & 5 are enabled */ + pci_read_config_dword(pdev, 0xe0, &val); + pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5); + + /* System clock is 48-MHz Oscillator and EHCI Enabled. */ + pci_write_config_dword(pdev, 0xe4, 1 << 5); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, + gef_ppc9a_nec_fixup); + +machine_arch_initcall(gef_ppc9a, mpc86xx_common_publish_devices); + +define_machine(gef_ppc9a) { + .name = "GE PPC9A", + .compatible = "gef,ppc9a", + .setup_arch = gef_ppc9a_setup_arch, + .init_IRQ = gef_ppc9a_init_irq, + .show_cpuinfo = gef_ppc9a_show_cpuinfo, + .get_irq = mpic_get_irq, + .time_init = mpc86xx_time_init, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif +}; diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c new file mode 100644 index 0000000000..689835f7f0 --- /dev/null +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GE SBC310 board support + * + * Author: Martyn Welch + * + * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc. + * + * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines) + * Copyright 2006 Freescale Semiconductor Inc. + * + * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "mpc86xx.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG (fmt...) do { printk(KERN_ERR "SBC310: " fmt); } while (0) +#else +#define DBG (fmt...) do { } while (0) +#endif + +void __iomem *sbc310_regs; + +static void __init gef_sbc310_init_irq(void) +{ + struct device_node *cascade_node = NULL; + + mpc86xx_init_irq(); + + /* + * There is a simple interrupt handler in the main FPGA, this needs + * to be cascaded into the MPIC + */ + cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic"); + if (!cascade_node) { + printk(KERN_WARNING "SBC310: No FPGA PIC\n"); + return; + } + + gef_pic_init(cascade_node); + of_node_put(cascade_node); +} + +static void __init gef_sbc310_setup_arch(void) +{ + struct device_node *regs; + printk(KERN_INFO "GE Intelligent Platforms SBC310 6U VPX SBC\n"); + +#ifdef CONFIG_SMP + mpc86xx_smp_init(); +#endif + + fsl_pci_assign_primary(); + + /* Remap basic board registers */ + regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs"); + if (regs) { + sbc310_regs = of_iomap(regs, 0); + if (sbc310_regs == NULL) + printk(KERN_WARNING "Unable to map board registers\n"); + of_node_put(regs); + } + +#if defined(CONFIG_MMIO_NVRAM) + mmio_nvram_init(); +#endif +} + +/* Return the PCB revision */ +static unsigned int gef_sbc310_get_board_id(void) +{ + unsigned int reg; + + reg = ioread32(sbc310_regs); + return reg & 0xff; +} + +/* Return the PCB revision */ +static unsigned int gef_sbc310_get_pcb_rev(void) +{ + unsigned int reg; + + reg = ioread32(sbc310_regs); + return (reg >> 8) & 0xff; +} + +/* Return the board (software) revision */ +static unsigned int gef_sbc310_get_board_rev(void) +{ + unsigned int reg; + + reg = ioread32(sbc310_regs); + return (reg >> 16) & 0xff; +} + +/* Return the FPGA revision */ +static unsigned int gef_sbc310_get_fpga_rev(void) +{ + unsigned int reg; + + reg = ioread32(sbc310_regs); + return (reg >> 24) & 0xf; +} + +static void gef_sbc310_show_cpuinfo(struct seq_file *m) +{ + uint svid = mfspr(SPRN_SVR); + + seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n"); + + seq_printf(m, "Board ID\t: 0x%2.2x\n", gef_sbc310_get_board_id()); + seq_printf(m, "Revision\t: %u%c\n", gef_sbc310_get_pcb_rev(), + ('A' + gef_sbc310_get_board_rev() - 1)); + seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc310_get_fpga_rev()); + + seq_printf(m, "SVR\t\t: 0x%x\n", svid); + +} + +static void gef_sbc310_nec_fixup(struct pci_dev *pdev) +{ + unsigned int val; + + /* Do not do the fixup on other platforms! */ + if (!machine_is(gef_sbc310)) + return; + + printk(KERN_INFO "Running NEC uPD720101 Fixup\n"); + + /* Ensure only ports 1 & 2 are enabled */ + pci_read_config_dword(pdev, 0xe0, &val); + pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2); + + /* System clock is 48-MHz Oscillator and EHCI Enabled. */ + pci_write_config_dword(pdev, 0xe4, 1 << 5); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, + gef_sbc310_nec_fixup); + +machine_arch_initcall(gef_sbc310, mpc86xx_common_publish_devices); + +define_machine(gef_sbc310) { + .name = "GE SBC310", + .compatible = "gef,sbc310", + .setup_arch = gef_sbc310_setup_arch, + .init_IRQ = gef_sbc310_init_irq, + .show_cpuinfo = gef_sbc310_show_cpuinfo, + .get_irq = mpic_get_irq, + .time_init = mpc86xx_time_init, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif +}; diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c new file mode 100644 index 0000000000..365f511186 --- /dev/null +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GE SBC610 board support + * + * Author: Martyn Welch + * + * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc. + * + * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines) + * Copyright 2006 Freescale Semiconductor Inc. + * + * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "mpc86xx.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG (fmt...) do { printk(KERN_ERR "SBC610: " fmt); } while (0) +#else +#define DBG (fmt...) do { } while (0) +#endif + +void __iomem *sbc610_regs; + +static void __init gef_sbc610_init_irq(void) +{ + struct device_node *cascade_node = NULL; + + mpc86xx_init_irq(); + + /* + * There is a simple interrupt handler in the main FPGA, this needs + * to be cascaded into the MPIC + */ + cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic"); + if (!cascade_node) { + printk(KERN_WARNING "SBC610: No FPGA PIC\n"); + return; + } + + gef_pic_init(cascade_node); + of_node_put(cascade_node); +} + +static void __init gef_sbc610_setup_arch(void) +{ + struct device_node *regs; + + printk(KERN_INFO "GE Intelligent Platforms SBC610 6U VPX SBC\n"); + +#ifdef CONFIG_SMP + mpc86xx_smp_init(); +#endif + + fsl_pci_assign_primary(); + + /* Remap basic board registers */ + regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs"); + if (regs) { + sbc610_regs = of_iomap(regs, 0); + if (sbc610_regs == NULL) + printk(KERN_WARNING "Unable to map board registers\n"); + of_node_put(regs); + } + +#if defined(CONFIG_MMIO_NVRAM) + mmio_nvram_init(); +#endif +} + +/* Return the PCB revision */ +static unsigned int gef_sbc610_get_pcb_rev(void) +{ + unsigned int reg; + + reg = ioread32(sbc610_regs); + return (reg >> 8) & 0xff; +} + +/* Return the board (software) revision */ +static unsigned int gef_sbc610_get_board_rev(void) +{ + unsigned int reg; + + reg = ioread32(sbc610_regs); + return (reg >> 16) & 0xff; +} + +/* Return the FPGA revision */ +static unsigned int gef_sbc610_get_fpga_rev(void) +{ + unsigned int reg; + + reg = ioread32(sbc610_regs); + return (reg >> 24) & 0xf; +} + +static void gef_sbc610_show_cpuinfo(struct seq_file *m) +{ + uint svid = mfspr(SPRN_SVR); + + seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n"); + + seq_printf(m, "Revision\t: %u%c\n", gef_sbc610_get_pcb_rev(), + ('A' + gef_sbc610_get_board_rev() - 1)); + seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc610_get_fpga_rev()); + + seq_printf(m, "SVR\t\t: 0x%x\n", svid); +} + +static void gef_sbc610_nec_fixup(struct pci_dev *pdev) +{ + unsigned int val; + + /* Do not do the fixup on other platforms! */ + if (!machine_is(gef_sbc610)) + return; + + printk(KERN_INFO "Running NEC uPD720101 Fixup\n"); + + /* Ensure ports 1, 2, 3, 4 & 5 are enabled */ + pci_read_config_dword(pdev, 0xe0, &val); + pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5); + + /* System clock is 48-MHz Oscillator and EHCI Enabled. */ + pci_write_config_dword(pdev, 0xe4, 1 << 5); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, + gef_sbc610_nec_fixup); + +machine_arch_initcall(gef_sbc610, mpc86xx_common_publish_devices); + +define_machine(gef_sbc610) { + .name = "GE SBC610", + .compatible = "gef,sbc610", + .setup_arch = gef_sbc610_setup_arch, + .init_IRQ = gef_sbc610_init_irq, + .show_cpuinfo = gef_sbc610_show_cpuinfo, + .get_irq = mpic_get_irq, + .time_init = mpc86xx_time_init, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif +}; diff --git a/arch/powerpc/platforms/86xx/mpc86xx.h b/arch/powerpc/platforms/86xx/mpc86xx.h new file mode 100644 index 0000000000..61e52c757e --- /dev/null +++ b/arch/powerpc/platforms/86xx/mpc86xx.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2006 Freescale Semiconductor Inc. + */ + +#ifndef __MPC86XX_H__ +#define __MPC86XX_H__ + +/* + * Declaration for the various functions exported by the + * mpc86xx_* files. Mostly for use by mpc86xx_setup(). + */ + +extern void mpc86xx_smp_init(void); +extern void mpc86xx_init_irq(void); +extern long mpc86xx_time_init(void); +extern int mpc86xx_common_publish_devices(void); + +#endif /* __MPC86XX_H__ */ diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c new file mode 100644 index 0000000000..8a7e55acf0 --- /dev/null +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Xianghua Xiao + * Zhang Wei + * + * Copyright 2006 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpc86xx.h" + +extern void __secondary_start_mpc86xx(void); + +#define MCM_PORT_CONFIG_OFFSET 0x10 + +/* Offset from CCSRBAR */ +#define MPC86xx_MCM_OFFSET (0x1000) +#define MPC86xx_MCM_SIZE (0x1000) + +static void __init +smp_86xx_release_core(int nr) +{ + __be32 __iomem *mcm_vaddr; + unsigned long pcr; + + if (nr < 0 || nr >= NR_CPUS) + return; + + /* + * Startup Core #nr. + */ + mcm_vaddr = ioremap(get_immrbase() + MPC86xx_MCM_OFFSET, + MPC86xx_MCM_SIZE); + pcr = in_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2)); + pcr |= 1 << (nr + 24); + out_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2), pcr); + + iounmap(mcm_vaddr); +} + + +static int __init +smp_86xx_kick_cpu(int nr) +{ + unsigned int save_vector; + unsigned long target, flags; + int n = 0; + unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100); + + if (nr < 0 || nr >= NR_CPUS) + return -ENOENT; + + pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr); + + local_irq_save(flags); + + /* Save reset vector */ + save_vector = *vector; + + /* Setup fake reset vector to call __secondary_start_mpc86xx. */ + target = (unsigned long) __secondary_start_mpc86xx; + patch_branch(vector, target, BRANCH_SET_LINK); + + /* Kick that CPU */ + smp_86xx_release_core(nr); + + /* Wait a bit for the CPU to take the exception. */ + while ((__secondary_hold_acknowledge != nr) && (n++, n < 1000)) + mdelay(1); + + /* Restore the exception vector */ + patch_instruction(vector, ppc_inst(save_vector)); + + local_irq_restore(flags); + + pr_debug("wait CPU #%d for %d msecs.\n", nr, n); + + return 0; +} + + +static void __init +smp_86xx_setup_cpu(int cpu_nr) +{ + mpic_setup_this_cpu(); +} + + +struct smp_ops_t smp_86xx_ops = { + .cause_nmi_ipi = NULL, + .message_pass = smp_mpic_message_pass, + .probe = smp_mpic_probe, + .kick_cpu = smp_86xx_kick_cpu, + .setup_cpu = smp_86xx_setup_cpu, + .take_timebase = smp_generic_take_timebase, + .give_timebase = smp_generic_give_timebase, +}; + + +void __init +mpc86xx_smp_init(void) +{ + smp_ops = &smp_86xx_ops; +} diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c new file mode 100644 index 0000000000..cee49ecd32 --- /dev/null +++ b/arch/powerpc/platforms/86xx/mvme7100.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Board setup routines for the Emerson/Artesyn MVME7100 + * + * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A. + * + * Author: Alessio Igor Bogani + * + * Based on earlier code by: + * + * Ajit Prem + * Copyright 2008 Emerson + * + * USB host fixup is borrowed by: + * + * Martyn Welch + * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc86xx.h" + +#define MVME7100_INTERRUPT_REG_2_OFFSET 0x05 +#define MVME7100_DS1375_MASK 0x40 +#define MVME7100_MAX6649_MASK 0x20 +#define MVME7100_ABORT_MASK 0x10 + +/* + * Setup the architecture + */ +static void __init mvme7100_setup_arch(void) +{ + struct device_node *bcsr_node; + void __iomem *mvme7100_regs = NULL; + u8 reg; + + if (ppc_md.progress) + ppc_md.progress("mvme7100_setup_arch()", 0); + +#ifdef CONFIG_SMP + mpc86xx_smp_init(); +#endif + + fsl_pci_assign_primary(); + + /* Remap BCSR registers */ + bcsr_node = of_find_compatible_node(NULL, NULL, + "artesyn,mvme7100-bcsr"); + if (bcsr_node) { + mvme7100_regs = of_iomap(bcsr_node, 0); + of_node_put(bcsr_node); + } + + if (mvme7100_regs) { + /* Disable ds1375, max6649, and abort interrupts */ + reg = readb(mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET); + reg |= MVME7100_DS1375_MASK | MVME7100_MAX6649_MASK + | MVME7100_ABORT_MASK; + writeb(reg, mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET); + } else + pr_warn("Unable to map board registers\n"); + + pr_info("MVME7100 board from Artesyn\n"); +} + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init mvme7100_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + return of_flat_dt_is_compatible(root, "artesyn,MVME7100"); +} + +static void mvme7100_usb_host_fixup(struct pci_dev *pdev) +{ + unsigned int val; + + if (!machine_is(mvme7100)) + return; + + /* Ensure only ports 1 & 2 are enabled */ + pci_read_config_dword(pdev, 0xe0, &val); + pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2); + + /* System clock is 48-MHz Oscillator and EHCI Enabled. */ + pci_write_config_dword(pdev, 0xe4, 1 << 5); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, + mvme7100_usb_host_fixup); + +machine_arch_initcall(mvme7100, mpc86xx_common_publish_devices); + +define_machine(mvme7100) { + .name = "MVME7100", + .probe = mvme7100_probe, + .setup_arch = mvme7100_setup_arch, + .init_IRQ = mpc86xx_init_irq, + .get_irq = mpic_get_irq, + .time_init = mpc86xx_time_init, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif +}; diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c new file mode 100644 index 0000000000..9ca36de235 --- /dev/null +++ b/arch/powerpc/platforms/86xx/pic.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "mpc86xx.h" + +#ifdef CONFIG_PPC_I8259 +static void mpc86xx_8259_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int cascade_irq = i8259_irq(); + + if (cascade_irq) + generic_handle_irq(cascade_irq); + + chip->irq_eoi(&desc->irq_data); +} +#endif /* CONFIG_PPC_I8259 */ + +void __init mpc86xx_init_irq(void) +{ +#ifdef CONFIG_PPC_I8259 + struct device_node *np; + struct device_node *cascade_node = NULL; + int cascade_irq; +#endif + + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " MPIC "); + BUG_ON(mpic == NULL); + + mpic_init(mpic); + +#ifdef CONFIG_PPC_I8259 + /* Initialize i8259 controller */ + for_each_node_by_type(np, "interrupt-controller") + if (of_device_is_compatible(np, "chrp,iic")) { + cascade_node = np; + break; + } + + if (cascade_node == NULL) { + printk(KERN_DEBUG "Could not find i8259 PIC\n"); + return; + } + + cascade_irq = irq_of_parse_and_map(cascade_node, 0); + if (!cascade_irq) { + printk(KERN_ERR "Failed to map cascade interrupt\n"); + return; + } + + i8259_init(cascade_node, 0); + of_node_put(cascade_node); + + irq_set_chained_handler(cascade_irq, mpc86xx_8259_cascade); +#endif +} diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig new file mode 100644 index 0000000000..a14d9d8997 --- /dev/null +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -0,0 +1,200 @@ +# SPDX-License-Identifier: GPL-2.0 +config CPM1 + bool + select CPM + +choice + prompt "8xx Machine Type" + depends on PPC_8xx + default MPC885ADS + +config MPC8XXFADS + bool "FADS" + +config MPC86XADS + bool "MPC86XADS" + select CPM1 + help + MPC86x Application Development System by Freescale Semiconductor. + The MPC86xADS is meant to serve as a platform for s/w and h/w + development around the MPC86X processor families. + +config MPC885ADS + bool "MPC885ADS" + select CPM1 + select OF_DYNAMIC + help + Freescale Semiconductor MPC885 Application Development System (ADS). + Also known as DUET. + The MPC885ADS is meant to serve as a platform for s/w and h/w + development around the MPC885 processor family. + +config PPC_EP88XC + bool "Embedded Planet EP88xC (a.k.a. CWH-PPC-885XN-VE)" + select CPM1 + help + This enables support for the Embedded Planet EP88xC board. + + This board is also resold by Freescale as the QUICCStart + MPC885 Evaluation System and/or the CWH-PPC-885XN-VE. + +config PPC_ADDER875 + bool "Analogue & Micro Adder 875" + select CPM1 + help + This enables support for the Analogue & Micro Adder 875 + board. + +config TQM8XX + bool "TQM8XX" + select CPM1 + help + support for the mpc8xx based boards from TQM. + +endchoice + +menu "Freescale Ethernet driver platform-specific options" + depends on (FS_ENET && MPC885ADS) + + config MPC8xx_SECOND_ETH + bool "Second Ethernet channel" + depends on MPC885ADS + default y + help + This enables support for second Ethernet on MPC885ADS and MPC86xADS boards. + The latter will use SCC1, for 885ADS you can select it below. + + choice + prompt "Second Ethernet channel" + depends on MPC8xx_SECOND_ETH + default MPC8xx_SECOND_ETH_FEC2 + + config MPC8xx_SECOND_ETH_FEC2 + bool "FEC2" + depends on MPC885ADS + help + Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2 + (often 2-nd UART) will not work if this is enabled. + + config MPC8xx_SECOND_ETH_SCC3 + bool "SCC3" + depends on MPC885ADS + help + Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1 + (often 1-nd UART) will not work if this is enabled. + + endchoice + +endmenu + +# +# MPC8xx Communication options +# + +menu "MPC8xx CPM Options" + depends on PPC_8xx + +# This doesn't really belong here, but it is convenient to ask +# 8xx specific questions. +comment "Generic MPC8xx Options" + +config 8xx_GPIO + bool "GPIO API Support" + select GPIOLIB + select OF_GPIO_MM_GPIOCHIP + help + Saying Y here will cause the ports on an MPC8xx processor to be used + with the GPIO API. If you say N here, the kernel needs less memory. + + If in doubt, say Y here. + +config 8xx_CPU15 + bool "CPU15 Silicon Errata" + depends on !HUGETLB_PAGE + default y + help + This enables a workaround for erratum CPU15 on MPC8xx chips. + This bug can cause incorrect code execution under certain + circumstances. This workaround adds some overhead (a TLB miss + every time execution crosses a page boundary), and you may wish + to disable it if you have worked around the bug in the compiler + (by not placing conditional branches or branches to LR or CTR + in the last word of a page, with a target of the last cache + line in the next page), or if you have used some other + workaround. + + If in doubt, say Y here. + +choice + prompt "Microcode patch selection" + default NO_UCODE_PATCH + help + Help not implemented yet, coming soon. + +config NO_UCODE_PATCH + bool "None" + +config USB_SOF_UCODE_PATCH + bool "USB SOF patch" + help + Help not implemented yet, coming soon. + +config I2C_SPI_UCODE_PATCH + bool "I2C/SPI relocation patch" + help + Help not implemented yet, coming soon. + +config I2C_SPI_SMC1_UCODE_PATCH + bool "I2C/SPI/SMC1 relocation patch" + help + Help not implemented yet, coming soon. + +config SMC_UCODE_PATCH + bool "SMC relocation patch" + help + This microcode relocates SMC1 and SMC2 parameter RAMs at + offset 0x1ec0 and 0x1fc0 to allow extended parameter RAM + for SCC3 and SCC4. + +endchoice + +config UCODE_PATCH + bool + default y + depends on !NO_UCODE_PATCH + +menu "8xx advanced setup" + depends on PPC_8xx + +config PIN_TLB + bool "Pinned Kernel TLBs" + depends on ADVANCED_OPTIONS + help + On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each + table 4 TLBs can be pinned. + + It reduces the amount of usable TLBs to 28 (ie by 12%). That's the + reason why we make it selectable. + + This option does nothing, it just activate the selection of what + to pin. + +config PIN_TLB_DATA + bool "Pinned TLB for DATA" + depends on PIN_TLB + default y + help + This pins the first 32 Mbytes of memory with 8M pages. + +config PIN_TLB_IMMR + bool "Pinned TLB for IMMR" + depends on PIN_TLB + default y + help + This pins the IMMR area with a 512kbytes page. In case + CONFIG_PIN_TLB_DATA is also selected, it will reduce + CONFIG_PIN_TLB_DATA to 24 Mbytes. + +endmenu + +endmenu diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile new file mode 100644 index 0000000000..5a098f7d5d --- /dev/null +++ b/arch/powerpc/platforms/8xx/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PowerPC 8xx linux kernel. +# +obj-y += m8xx_setup.o machine_check.o pic.o +obj-$(CONFIG_CPM1) += cpm1.o cpm1-ic.o +obj-$(CONFIG_UCODE_PATCH) += micropatch.o +obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o +obj-$(CONFIG_MPC86XADS) += mpc86xads_setup.o +obj-$(CONFIG_PPC_EP88XC) += ep88xc.o +obj-$(CONFIG_PPC_ADDER875) += adder875.o +obj-$(CONFIG_TQM8XX) += tqm8xx_setup.o diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c new file mode 100644 index 0000000000..d02f8dd664 --- /dev/null +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Analogue & Micro Adder MPC875 board support + * + * Author: Scott Wood + * + * Copyright (c) 2007 Freescale Semiconductor, Inc. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "mpc8xx.h" +#include "pic.h" + +struct cpm_pin { + int port, pin, flags; +}; + +static __initdata struct cpm_pin adder875_pins[] = { + /* SMC1 */ + {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ + + /* MII1 */ + {CPM_PORTA, 0, CPM_PIN_INPUT}, + {CPM_PORTA, 1, CPM_PIN_INPUT}, + {CPM_PORTA, 2, CPM_PIN_INPUT}, + {CPM_PORTA, 3, CPM_PIN_INPUT}, + {CPM_PORTA, 4, CPM_PIN_OUTPUT}, + {CPM_PORTA, 10, CPM_PIN_OUTPUT}, + {CPM_PORTA, 11, CPM_PIN_OUTPUT}, + {CPM_PORTB, 19, CPM_PIN_INPUT}, + {CPM_PORTB, 31, CPM_PIN_INPUT}, + {CPM_PORTC, 12, CPM_PIN_INPUT}, + {CPM_PORTC, 13, CPM_PIN_INPUT}, + {CPM_PORTE, 30, CPM_PIN_OUTPUT}, + {CPM_PORTE, 31, CPM_PIN_OUTPUT}, + + /* MII2 */ + {CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 16, CPM_PIN_OUTPUT}, + {CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 21, CPM_PIN_OUTPUT}, + {CPM_PORTE, 22, CPM_PIN_OUTPUT}, + {CPM_PORTE, 23, CPM_PIN_OUTPUT}, + {CPM_PORTE, 24, CPM_PIN_OUTPUT}, + {CPM_PORTE, 25, CPM_PIN_OUTPUT}, + {CPM_PORTE, 26, CPM_PIN_OUTPUT}, + {CPM_PORTE, 27, CPM_PIN_OUTPUT}, + {CPM_PORTE, 28, CPM_PIN_OUTPUT}, + {CPM_PORTE, 29, CPM_PIN_OUTPUT}, +}; + +static void __init init_ioports(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(adder875_pins); i++) { + const struct cpm_pin *pin = &adder875_pins[i]; + cpm1_set_pin(pin->port, pin->pin, pin->flags); + } + + cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX); + + /* Set FEC1 and FEC2 to MII mode */ + clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180); +} + +static void __init adder875_setup(void) +{ + cpm_reset(); + init_ioports(); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .compatible = "simple-bus", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + return 0; +} +machine_device_initcall(adder875, declare_of_platform_devices); + +define_machine(adder875) { + .name = "Adder MPC875", + .compatible = "analogue-and-micro,adder875", + .setup_arch = adder875_setup, + .init_IRQ = mpc8xx_pic_init, + .get_irq = mpc8xx_get_irq, + .restart = mpc8xx_restart, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c new file mode 100644 index 0000000000..a18fc7c99f --- /dev/null +++ b/arch/powerpc/platforms/8xx/cpm1-ic.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Interrupt controller for the + * Communication Processor Module. + * Copyright (c) 1997 Dan error_act (dmalek@jlc.net) + */ +#include +#include +#include +#include +#include + +struct cpm_pic_data { + cpic8xx_t __iomem *reg; + struct irq_domain *host; +}; + +static void cpm_mask_irq(struct irq_data *d) +{ + struct cpm_pic_data *data = irq_data_get_irq_chip_data(d); + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); + + clrbits32(&data->reg->cpic_cimr, (1 << cpm_vec)); +} + +static void cpm_unmask_irq(struct irq_data *d) +{ + struct cpm_pic_data *data = irq_data_get_irq_chip_data(d); + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); + + setbits32(&data->reg->cpic_cimr, (1 << cpm_vec)); +} + +static void cpm_end_irq(struct irq_data *d) +{ + struct cpm_pic_data *data = irq_data_get_irq_chip_data(d); + unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d); + + out_be32(&data->reg->cpic_cisr, (1 << cpm_vec)); +} + +static struct irq_chip cpm_pic = { + .name = "CPM PIC", + .irq_mask = cpm_mask_irq, + .irq_unmask = cpm_unmask_irq, + .irq_eoi = cpm_end_irq, +}; + +static int cpm_get_irq(struct irq_desc *desc) +{ + struct cpm_pic_data *data = irq_desc_get_handler_data(desc); + int cpm_vec; + + /* + * Get the vector by setting the ACK bit and then reading + * the register. + */ + out_be16(&data->reg->cpic_civr, 1); + cpm_vec = in_be16(&data->reg->cpic_civr); + cpm_vec >>= 11; + + return irq_linear_revmap(data->host, cpm_vec); +} + +static void cpm_cascade(struct irq_desc *desc) +{ + generic_handle_irq(cpm_get_irq(desc)); +} + +static int cpm_pic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_chip_data(virq, h->host_data); + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_chip_and_handler(virq, &cpm_pic, handle_fasteoi_irq); + return 0; +} + +static const struct irq_domain_ops cpm_pic_host_ops = { + .map = cpm_pic_host_map, +}; + +static int cpm_pic_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + int irq; + struct cpm_pic_data *data; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->reg = devm_ioremap(dev, res->start, resource_size(res)); + if (!data->reg) + return -ENODEV; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + /* Initialize the CPM interrupt controller. */ + out_be32(&data->reg->cpic_cicr, + (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) | + ((virq_to_hw(irq) / 2) << 13) | CICR_HP_MASK); + + out_be32(&data->reg->cpic_cimr, 0); + + data->host = irq_domain_add_linear(dev->of_node, 64, &cpm_pic_host_ops, data); + if (!data->host) + return -ENODEV; + + irq_set_handler_data(irq, data); + irq_set_chained_handler(irq, cpm_cascade); + + setbits32(&data->reg->cpic_cicr, CICR_IEN); + + return 0; +} + +static const struct of_device_id cpm_pic_match[] = { + { + .compatible = "fsl,cpm1-pic", + }, { + .type = "cpm-pic", + .compatible = "CPM", + }, {}, +}; + +static struct platform_driver cpm_pic_driver = { + .driver = { + .name = "cpm-pic", + .of_match_table = cpm_pic_match, + }, + .probe = cpm_pic_probe, +}; + +static int __init cpm_pic_init(void) +{ + return platform_driver_register(&cpm_pic_driver); +} +arch_initcall(cpm_pic_init); + +/* + * The CPM can generate the error interrupt when there is a race condition + * between generating and masking interrupts. All we have to do is ACK it + * and return. This is a no-op function so we don't need any special + * tests in the interrupt handler. + */ +static irqreturn_t cpm_error_interrupt(int irq, void *dev) +{ + return IRQ_HANDLED; +} + +static int cpm_error_probe(struct platform_device *pdev) +{ + int irq; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + return request_irq(irq, cpm_error_interrupt, IRQF_NO_THREAD, "error", NULL); +} + +static const struct of_device_id cpm_error_ids[] = { + { .compatible = "fsl,cpm1" }, + { .type = "cpm" }, + {}, +}; + +static struct platform_driver cpm_error_driver = { + .driver = { + .name = "cpm-error", + .of_match_table = cpm_error_ids, + }, + .probe = cpm_error_probe, +}; + +static int __init cpm_error_init(void) +{ + return platform_driver_register(&cpm_error_driver); +} +subsys_initcall(cpm_error_init); diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c new file mode 100644 index 0000000000..ebb5f6a27d --- /dev/null +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -0,0 +1,636 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * General Purpose functions for the global management of the + * Communication Processor Module. + * Copyright (c) 1997 Dan error_act (dmalek@jlc.net) + * + * In addition to the individual control of the communication + * channels, there are a few functions that globally affect the + * communication processor. + * + * Buffer descriptors must be allocated from the dual ported memory + * space. The allocator for that is here. When the communication + * process is reset, we reclaim the memory available. There is + * currently no deallocator for this memory. + * The amount of space available is platform dependent. On the + * MBX, the EPPC software loads additional microcode into the + * communication processor, and uses some of the DP ram for this + * purpose. Current, the first 512 bytes and the last 256 bytes of + * memory are used. Right now I am conservative and only use the + * memory that can never be used for microcode. If there are + * applications that require more DP ram, we can expand the boundaries + * but then we have to be careful of any downloaded microcode. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_8xx_GPIO +#include +#endif + +#define CPM_MAP_SIZE (0x4000) + +cpm8xx_t __iomem *cpmp; /* Pointer to comm processor space */ +immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE; + +void __init cpm_reset(void) +{ + cpmp = &mpc8xx_immr->im_cpm; + +#ifndef CONFIG_PPC_EARLY_DEBUG_CPM + /* Perform a reset. */ + out_be16(&cpmp->cp_cpcr, CPM_CR_RST | CPM_CR_FLG); + + /* Wait for it. */ + while (in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG); +#endif + +#ifdef CONFIG_UCODE_PATCH + cpm_load_patch(cpmp); +#endif + + /* + * Set SDMA Bus Request priority 5. + * On 860T, this also enables FEC priority 6. I am not sure + * this is what we really want for some applications, but the + * manual recommends it. + * Bit 25, FAM can also be set to use FEC aggressive mode (860T). + */ + if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */ + out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40); + else + out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1); +} + +static DEFINE_SPINLOCK(cmd_lock); + +#define MAX_CR_CMD_LOOPS 10000 + +int cpm_command(u32 command, u8 opcode) +{ + int i, ret; + unsigned long flags; + + if (command & 0xffffff03) + return -EINVAL; + + spin_lock_irqsave(&cmd_lock, flags); + + ret = 0; + out_be16(&cpmp->cp_cpcr, command | CPM_CR_FLG | (opcode << 8)); + for (i = 0; i < MAX_CR_CMD_LOOPS; i++) + if ((in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG) == 0) + goto out; + + printk(KERN_ERR "%s(): Not able to issue CPM command\n", __func__); + ret = -EIO; +out: + spin_unlock_irqrestore(&cmd_lock, flags); + return ret; +} +EXPORT_SYMBOL(cpm_command); + +/* + * Set a baud rate generator. This needs lots of work. There are + * four BRGs, any of which can be wired to any channel. + * The internal baud rate clock is the system clock divided by 16. + * This assumes the baudrate is 16x oversampled by the uart. + */ +#define BRG_INT_CLK (get_brgfreq()) +#define BRG_UART_CLK (BRG_INT_CLK/16) +#define BRG_UART_CLK_DIV16 (BRG_UART_CLK/16) + +void +cpm_setbrg(uint brg, uint rate) +{ + u32 __iomem *bp; + + /* This is good enough to get SMCs running..... */ + bp = &cpmp->cp_brgc1; + bp += brg; + /* + * The BRG has a 12-bit counter. For really slow baud rates (or + * really fast processors), we may have to further divide by 16. + */ + if (((BRG_UART_CLK / rate) - 1) < 4096) + out_be32(bp, (((BRG_UART_CLK / rate) - 1) << 1) | CPM_BRG_EN); + else + out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) | + CPM_BRG_EN | CPM_BRG_DIV16); +} +EXPORT_SYMBOL(cpm_setbrg); + +struct cpm_ioport16 { + __be16 dir, par, odr_sor, dat, intr; + __be16 res[3]; +}; + +struct cpm_ioport32b { + __be32 dir, par, odr, dat; +}; + +struct cpm_ioport32e { + __be32 dir, par, sor, odr, dat; +}; + +static void __init cpm1_set_pin32(int port, int pin, int flags) +{ + struct cpm_ioport32e __iomem *iop; + pin = 1 << (31 - pin); + + if (port == CPM_PORTB) + iop = (struct cpm_ioport32e __iomem *) + &mpc8xx_immr->im_cpm.cp_pbdir; + else + iop = (struct cpm_ioport32e __iomem *) + &mpc8xx_immr->im_cpm.cp_pedir; + + if (flags & CPM_PIN_OUTPUT) + setbits32(&iop->dir, pin); + else + clrbits32(&iop->dir, pin); + + if (!(flags & CPM_PIN_GPIO)) + setbits32(&iop->par, pin); + else + clrbits32(&iop->par, pin); + + if (port == CPM_PORTB) { + if (flags & CPM_PIN_OPENDRAIN) + setbits16(&mpc8xx_immr->im_cpm.cp_pbodr, pin); + else + clrbits16(&mpc8xx_immr->im_cpm.cp_pbodr, pin); + } + + if (port == CPM_PORTE) { + if (flags & CPM_PIN_SECONDARY) + setbits32(&iop->sor, pin); + else + clrbits32(&iop->sor, pin); + + if (flags & CPM_PIN_OPENDRAIN) + setbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin); + else + clrbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin); + } +} + +static void __init cpm1_set_pin16(int port, int pin, int flags) +{ + struct cpm_ioport16 __iomem *iop = + (struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport; + + pin = 1 << (15 - pin); + + if (port != 0) + iop += port - 1; + + if (flags & CPM_PIN_OUTPUT) + setbits16(&iop->dir, pin); + else + clrbits16(&iop->dir, pin); + + if (!(flags & CPM_PIN_GPIO)) + setbits16(&iop->par, pin); + else + clrbits16(&iop->par, pin); + + if (port == CPM_PORTA) { + if (flags & CPM_PIN_OPENDRAIN) + setbits16(&iop->odr_sor, pin); + else + clrbits16(&iop->odr_sor, pin); + } + if (port == CPM_PORTC) { + if (flags & CPM_PIN_SECONDARY) + setbits16(&iop->odr_sor, pin); + else + clrbits16(&iop->odr_sor, pin); + if (flags & CPM_PIN_FALLEDGE) + setbits16(&iop->intr, pin); + else + clrbits16(&iop->intr, pin); + } +} + +void __init cpm1_set_pin(enum cpm_port port, int pin, int flags) +{ + if (port == CPM_PORTB || port == CPM_PORTE) + cpm1_set_pin32(port, pin, flags); + else + cpm1_set_pin16(port, pin, flags); +} + +int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode) +{ + int shift; + int i, bits = 0; + u32 __iomem *reg; + u32 mask = 7; + + u8 clk_map[][3] = { + {CPM_CLK_SCC1, CPM_BRG1, 0}, + {CPM_CLK_SCC1, CPM_BRG2, 1}, + {CPM_CLK_SCC1, CPM_BRG3, 2}, + {CPM_CLK_SCC1, CPM_BRG4, 3}, + {CPM_CLK_SCC1, CPM_CLK1, 4}, + {CPM_CLK_SCC1, CPM_CLK2, 5}, + {CPM_CLK_SCC1, CPM_CLK3, 6}, + {CPM_CLK_SCC1, CPM_CLK4, 7}, + + {CPM_CLK_SCC2, CPM_BRG1, 0}, + {CPM_CLK_SCC2, CPM_BRG2, 1}, + {CPM_CLK_SCC2, CPM_BRG3, 2}, + {CPM_CLK_SCC2, CPM_BRG4, 3}, + {CPM_CLK_SCC2, CPM_CLK1, 4}, + {CPM_CLK_SCC2, CPM_CLK2, 5}, + {CPM_CLK_SCC2, CPM_CLK3, 6}, + {CPM_CLK_SCC2, CPM_CLK4, 7}, + + {CPM_CLK_SCC3, CPM_BRG1, 0}, + {CPM_CLK_SCC3, CPM_BRG2, 1}, + {CPM_CLK_SCC3, CPM_BRG3, 2}, + {CPM_CLK_SCC3, CPM_BRG4, 3}, + {CPM_CLK_SCC3, CPM_CLK5, 4}, + {CPM_CLK_SCC3, CPM_CLK6, 5}, + {CPM_CLK_SCC3, CPM_CLK7, 6}, + {CPM_CLK_SCC3, CPM_CLK8, 7}, + + {CPM_CLK_SCC4, CPM_BRG1, 0}, + {CPM_CLK_SCC4, CPM_BRG2, 1}, + {CPM_CLK_SCC4, CPM_BRG3, 2}, + {CPM_CLK_SCC4, CPM_BRG4, 3}, + {CPM_CLK_SCC4, CPM_CLK5, 4}, + {CPM_CLK_SCC4, CPM_CLK6, 5}, + {CPM_CLK_SCC4, CPM_CLK7, 6}, + {CPM_CLK_SCC4, CPM_CLK8, 7}, + + {CPM_CLK_SMC1, CPM_BRG1, 0}, + {CPM_CLK_SMC1, CPM_BRG2, 1}, + {CPM_CLK_SMC1, CPM_BRG3, 2}, + {CPM_CLK_SMC1, CPM_BRG4, 3}, + {CPM_CLK_SMC1, CPM_CLK1, 4}, + {CPM_CLK_SMC1, CPM_CLK2, 5}, + {CPM_CLK_SMC1, CPM_CLK3, 6}, + {CPM_CLK_SMC1, CPM_CLK4, 7}, + + {CPM_CLK_SMC2, CPM_BRG1, 0}, + {CPM_CLK_SMC2, CPM_BRG2, 1}, + {CPM_CLK_SMC2, CPM_BRG3, 2}, + {CPM_CLK_SMC2, CPM_BRG4, 3}, + {CPM_CLK_SMC2, CPM_CLK5, 4}, + {CPM_CLK_SMC2, CPM_CLK6, 5}, + {CPM_CLK_SMC2, CPM_CLK7, 6}, + {CPM_CLK_SMC2, CPM_CLK8, 7}, + }; + + switch (target) { + case CPM_CLK_SCC1: + reg = &mpc8xx_immr->im_cpm.cp_sicr; + shift = 0; + break; + + case CPM_CLK_SCC2: + reg = &mpc8xx_immr->im_cpm.cp_sicr; + shift = 8; + break; + + case CPM_CLK_SCC3: + reg = &mpc8xx_immr->im_cpm.cp_sicr; + shift = 16; + break; + + case CPM_CLK_SCC4: + reg = &mpc8xx_immr->im_cpm.cp_sicr; + shift = 24; + break; + + case CPM_CLK_SMC1: + reg = &mpc8xx_immr->im_cpm.cp_simode; + shift = 12; + break; + + case CPM_CLK_SMC2: + reg = &mpc8xx_immr->im_cpm.cp_simode; + shift = 28; + break; + + default: + printk(KERN_ERR "cpm1_clock_setup: invalid clock target\n"); + return -EINVAL; + } + + for (i = 0; i < ARRAY_SIZE(clk_map); i++) { + if (clk_map[i][0] == target && clk_map[i][1] == clock) { + bits = clk_map[i][2]; + break; + } + } + + if (i == ARRAY_SIZE(clk_map)) { + printk(KERN_ERR "cpm1_clock_setup: invalid clock combination\n"); + return -EINVAL; + } + + bits <<= shift; + mask <<= shift; + + if (reg == &mpc8xx_immr->im_cpm.cp_sicr) { + if (mode == CPM_CLK_RTX) { + bits |= bits << 3; + mask |= mask << 3; + } else if (mode == CPM_CLK_RX) { + bits <<= 3; + mask <<= 3; + } + } + + out_be32(reg, (in_be32(reg) & ~mask) | bits); + + return 0; +} + +/* + * GPIO LIB API implementation + */ +#ifdef CONFIG_8xx_GPIO + +struct cpm1_gpio16_chip { + struct of_mm_gpio_chip mm_gc; + spinlock_t lock; + + /* shadowed data register to clear/set bits safely */ + u16 cpdata; + + /* IRQ associated with Pins when relevant */ + int irq[16]; +}; + +static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc) +{ + struct cpm1_gpio16_chip *cpm1_gc = + container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + + cpm1_gc->cpdata = in_be16(&iop->dat); +} + +static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + u16 pin_mask; + + pin_mask = 1 << (15 - gpio); + + return !!(in_be16(&iop->dat) & pin_mask); +} + +static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask, + int value) +{ + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + + if (value) + cpm1_gc->cpdata |= pin_mask; + else + cpm1_gc->cpdata &= ~pin_mask; + + out_be16(&iop->dat, cpm1_gc->cpdata); +} + +static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + unsigned long flags; + u16 pin_mask = 1 << (15 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + __cpm1_gpio16_set(mm_gc, pin_mask, value); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); +} + +static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + + return cpm1_gc->irq[gpio] ? : -ENXIO; +} + +static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + unsigned long flags; + u16 pin_mask = 1 << (15 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + setbits16(&iop->dir, pin_mask); + __cpm1_gpio16_set(mm_gc, pin_mask, val); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); + + return 0; +} + +static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm_ioport16 __iomem *iop = mm_gc->regs; + unsigned long flags; + u16 pin_mask = 1 << (15 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + clrbits16(&iop->dir, pin_mask); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); + + return 0; +} + +int cpm1_gpiochip_add16(struct device *dev) +{ + struct device_node *np = dev->of_node; + struct cpm1_gpio16_chip *cpm1_gc; + struct of_mm_gpio_chip *mm_gc; + struct gpio_chip *gc; + u16 mask; + + cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL); + if (!cpm1_gc) + return -ENOMEM; + + spin_lock_init(&cpm1_gc->lock); + + if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) { + int i, j; + + for (i = 0, j = 0; i < 16; i++) + if (mask & (1 << (15 - i))) + cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++); + } + + mm_gc = &cpm1_gc->mm_gc; + gc = &mm_gc->gc; + + mm_gc->save_regs = cpm1_gpio16_save_regs; + gc->ngpio = 16; + gc->direction_input = cpm1_gpio16_dir_in; + gc->direction_output = cpm1_gpio16_dir_out; + gc->get = cpm1_gpio16_get; + gc->set = cpm1_gpio16_set; + gc->to_irq = cpm1_gpio16_to_irq; + gc->parent = dev; + gc->owner = THIS_MODULE; + + return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); +} + +struct cpm1_gpio32_chip { + struct of_mm_gpio_chip mm_gc; + spinlock_t lock; + + /* shadowed data register to clear/set bits safely */ + u32 cpdata; +}; + +static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) +{ + struct cpm1_gpio32_chip *cpm1_gc = + container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + + cpm1_gc->cpdata = in_be32(&iop->dat); +} + +static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + u32 pin_mask; + + pin_mask = 1 << (31 - gpio); + + return !!(in_be32(&iop->dat) & pin_mask); +} + +static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask, + int value) +{ + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + + if (value) + cpm1_gc->cpdata |= pin_mask; + else + cpm1_gc->cpdata &= ~pin_mask; + + out_be32(&iop->dat, cpm1_gc->cpdata); +} + +static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + unsigned long flags; + u32 pin_mask = 1 << (31 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + __cpm1_gpio32_set(mm_gc, pin_mask, value); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); +} + +static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + unsigned long flags; + u32 pin_mask = 1 << (31 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + setbits32(&iop->dir, pin_mask); + __cpm1_gpio32_set(mm_gc, pin_mask, val); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); + + return 0; +} + +static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) +{ + struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm_ioport32b __iomem *iop = mm_gc->regs; + unsigned long flags; + u32 pin_mask = 1 << (31 - gpio); + + spin_lock_irqsave(&cpm1_gc->lock, flags); + + clrbits32(&iop->dir, pin_mask); + + spin_unlock_irqrestore(&cpm1_gc->lock, flags); + + return 0; +} + +int cpm1_gpiochip_add32(struct device *dev) +{ + struct device_node *np = dev->of_node; + struct cpm1_gpio32_chip *cpm1_gc; + struct of_mm_gpio_chip *mm_gc; + struct gpio_chip *gc; + + cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL); + if (!cpm1_gc) + return -ENOMEM; + + spin_lock_init(&cpm1_gc->lock); + + mm_gc = &cpm1_gc->mm_gc; + gc = &mm_gc->gc; + + mm_gc->save_regs = cpm1_gpio32_save_regs; + gc->ngpio = 32; + gc->direction_input = cpm1_gpio32_dir_in; + gc->direction_output = cpm1_gpio32_dir_out; + gc->get = cpm1_gpio32_get; + gc->set = cpm1_gpio32_set; + gc->parent = dev; + gc->owner = THIS_MODULE; + + return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); +} + +#endif /* CONFIG_8xx_GPIO */ diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c new file mode 100644 index 0000000000..fc276a29d6 --- /dev/null +++ b/arch/powerpc/platforms/8xx/ep88xc.c @@ -0,0 +1,170 @@ +/* + * Platform setup for the Embedded Planet EP88xC board + * + * Author: Scott Wood + * Copyright 2007 Freescale Semiconductor, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mpc8xx.h" +#include "pic.h" + +struct cpm_pin { + int port, pin, flags; +}; + +static struct cpm_pin ep88xc_pins[] = { + /* SMC1 */ + {1, 24, CPM_PIN_INPUT}, /* RX */ + {1, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ + + /* SCC2 */ + {0, 12, CPM_PIN_INPUT}, /* TX */ + {0, 13, CPM_PIN_INPUT}, /* RX */ + {2, 8, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CD */ + {2, 9, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CTS */ + {2, 14, CPM_PIN_INPUT}, /* RTS */ + + /* MII1 */ + {0, 0, CPM_PIN_INPUT}, + {0, 1, CPM_PIN_INPUT}, + {0, 2, CPM_PIN_INPUT}, + {0, 3, CPM_PIN_INPUT}, + {0, 4, CPM_PIN_OUTPUT}, + {0, 10, CPM_PIN_OUTPUT}, + {0, 11, CPM_PIN_OUTPUT}, + {1, 19, CPM_PIN_INPUT}, + {1, 31, CPM_PIN_INPUT}, + {2, 12, CPM_PIN_INPUT}, + {2, 13, CPM_PIN_INPUT}, + {3, 8, CPM_PIN_INPUT}, + {4, 30, CPM_PIN_OUTPUT}, + {4, 31, CPM_PIN_OUTPUT}, + + /* MII2 */ + {4, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {4, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {4, 16, CPM_PIN_OUTPUT}, + {4, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {4, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {4, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {4, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {4, 21, CPM_PIN_OUTPUT}, + {4, 22, CPM_PIN_OUTPUT}, + {4, 23, CPM_PIN_OUTPUT}, + {4, 24, CPM_PIN_OUTPUT}, + {4, 25, CPM_PIN_OUTPUT}, + {4, 26, CPM_PIN_OUTPUT}, + {4, 27, CPM_PIN_OUTPUT}, + {4, 28, CPM_PIN_OUTPUT}, + {4, 29, CPM_PIN_OUTPUT}, + + /* USB */ + {0, 6, CPM_PIN_INPUT}, /* CLK2 */ + {0, 14, CPM_PIN_INPUT}, /* USBOE */ + {0, 15, CPM_PIN_INPUT}, /* USBRXD */ + {2, 6, CPM_PIN_OUTPUT}, /* USBTXN */ + {2, 7, CPM_PIN_OUTPUT}, /* USBTXP */ + {2, 10, CPM_PIN_INPUT}, /* USBRXN */ + {2, 11, CPM_PIN_INPUT}, /* USBRXP */ + + /* Misc */ + {1, 26, CPM_PIN_INPUT}, /* BRGO2 */ + {1, 27, CPM_PIN_INPUT}, /* BRGO1 */ +}; + +static void __init init_ioports(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ep88xc_pins); i++) { + struct cpm_pin *pin = &ep88xc_pins[i]; + cpm1_set_pin(pin->port, pin->pin, pin->flags); + } + + cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX); + cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_TX); /* USB */ + cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX); + cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX); + cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX); +} + +static u8 __iomem *ep88xc_bcsr; + +#define BCSR7_SCC2_ENABLE 0x10 + +#define BCSR8_PHY1_ENABLE 0x80 +#define BCSR8_PHY1_POWER 0x40 +#define BCSR8_PHY2_ENABLE 0x20 +#define BCSR8_PHY2_POWER 0x10 + +#define BCSR9_USB_ENABLE 0x80 +#define BCSR9_USB_POWER 0x40 +#define BCSR9_USB_HOST 0x20 +#define BCSR9_USB_FULL_SPEED_TARGET 0x10 + +static void __init ep88xc_setup_arch(void) +{ + struct device_node *np; + + cpm_reset(); + init_ioports(); + + np = of_find_compatible_node(NULL, NULL, "fsl,ep88xc-bcsr"); + if (!np) { + printk(KERN_CRIT "Could not find fsl,ep88xc-bcsr node\n"); + return; + } + + ep88xc_bcsr = of_iomap(np, 0); + of_node_put(np); + + if (!ep88xc_bcsr) { + printk(KERN_CRIT "Could not remap BCSR\n"); + return; + } + + setbits8(&ep88xc_bcsr[7], BCSR7_SCC2_ENABLE); + setbits8(&ep88xc_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER | + BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .name = "soc", }, + { .name = "cpm", }, + { .name = "localbus", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + /* Publish the QE devices */ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + + return 0; +} +machine_device_initcall(ep88xc, declare_of_platform_devices); + +define_machine(ep88xc) { + .name = "Embedded Planet EP88xC", + .compatible = "fsl,ep88xc", + .setup_arch = ep88xc_setup_arch, + .init_IRQ = mpc8xx_pic_init, + .get_irq = mpc8xx_get_irq, + .restart = mpc8xx_restart, + .calibrate_decr = mpc8xx_calibrate_decr, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c new file mode 100644 index 0000000000..2336b687bc --- /dev/null +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified for MBX using prep/chrp/pmac functions by Dan (dmalek@jlc.net) + * Further modified for generic 8xx by Dan. + */ + +/* + * bootup setup stuff.. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "pic.h" + +#include "mpc8xx.h" + +/* A place holder for time base interrupts, if they are ever enabled. */ +static irqreturn_t timebase_interrupt(int irq, void *dev) +{ + printk ("timebase_interrupt()\n"); + + return IRQ_HANDLED; +} + +static int __init get_freq(char *name, unsigned long *val) +{ + struct device_node *cpu; + const unsigned int *fp; + int found = 0; + + /* The cpu node should have timebase and clock frequency properties */ + cpu = of_get_cpu_node(0, NULL); + + if (cpu) { + fp = of_get_property(cpu, name, NULL); + if (fp) { + found = 1; + *val = *fp; + } + + of_node_put(cpu); + } + + return found; +} + +/* The decrementer counts at the system (internal) clock frequency divided by + * sixteen, or external oscillator divided by four. We force the processor + * to use system clock divided by sixteen. + */ +void __init mpc8xx_calibrate_decr(void) +{ + struct device_node *cpu; + int irq, virq; + + /* Unlock the SCCR. */ + out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY); + + /* Force all 8xx processors to use divide by 16 processor clock. */ + setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000); + + /* Processor frequency is MHz. + */ + ppc_proc_freq = 50000000; + if (!get_freq("clock-frequency", &ppc_proc_freq)) + printk(KERN_ERR "WARNING: Estimating processor frequency " + "(not found)\n"); + + ppc_tb_freq = ppc_proc_freq / 16; + printk("Decrementer Frequency = 0x%lx\n", ppc_tb_freq); + + /* Perform some more timer/timebase initialization. This used + * to be done elsewhere, but other changes caused it to get + * called more than once....that is a bad thing. + * + * First, unlock all of the registers we are going to modify. + * To protect them from corruption during power down, registers + * that are maintained by keep alive power are "locked". To + * modify these registers we have to write the key value to + * the key location associated with the register. + * Some boards power up with these unlocked, while others + * are locked. Writing anything (including the unlock code?) + * to the unlocked registers will lock them again. So, here + * we guarantee the registers are locked, then we unlock them + * for our use. + */ + out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY); + + /* Disable the RTC one second and alarm interrupts. */ + clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE)); + + /* Enable the RTC */ + setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE)); + + /* Enabling the decrementer also enables the timebase interrupts + * (or from the other point of view, to get decrementer interrupts + * we have to enable the timebase). The decrementer interrupt + * is wired into the vector table, nothing to do here for that. + */ + cpu = of_get_cpu_node(0, NULL); + virq= irq_of_parse_and_map(cpu, 0); + of_node_put(cpu); + irq = virq_to_hw(virq); + + out_be16(&mpc8xx_immr->im_sit.sit_tbscr, + ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE)); + + if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint", + NULL)) + panic("Could not allocate timer IRQ!"); +} + +/* The RTC on the MPC8xx is an internal register. + * We want to protect this during power down, so we need to unlock, + * modify, and re-lock. + */ + +int mpc8xx_set_rtc_time(struct rtc_time *tm) +{ + time64_t time; + + time = rtc_tm_to_time64(tm); + + out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY); + + return 0; +} + +void mpc8xx_get_rtc_time(struct rtc_time *tm) +{ + unsigned long data; + + /* Get time from the RTC. */ + data = in_be32(&mpc8xx_immr->im_sit.sit_rtc); + rtc_time64_to_tm(data, tm); + return; +} + +void __noreturn mpc8xx_restart(char *cmd) +{ + local_irq_disable(); + + setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080); + /* Clear the ME bit in MSR to cause checkstop on machine check + */ + mtmsr(mfmsr() & ~0x1000); + + in_8(&mpc8xx_immr->im_clkrst.res[0]); + panic("Restart failed\n"); +} diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c new file mode 100644 index 0000000000..6563659758 --- /dev/null +++ b/arch/powerpc/platforms/8xx/machine_check.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + */ + +#include +#include +#include + +#include + +int machine_check_8xx(struct pt_regs *regs) +{ + unsigned long reason = regs->msr; + + pr_err("Machine check in kernel mode.\n"); + pr_err("Caused by (from SRR1=%lx): ", reason); + if (reason & 0x40000000) + pr_cont("Fetch error at address %lx\n", regs->nip); + else + pr_cont("Data access error at address %lx\n", regs->dar); + +#ifdef CONFIG_PCI + /* the qspan pci read routines can cause machine checks -- Cort + * + * yuck !!! that totally needs to go away ! There are better ways + * to deal with that than having a wart in the mcheck handler. + * -- BenH + */ + bad_page_fault(regs, SIGBUS); + return 1; +#else + return 0; +#endif +} diff --git a/arch/powerpc/platforms/8xx/mpc86xads.h b/arch/powerpc/platforms/8xx/mpc86xads.h new file mode 100644 index 0000000000..17b1fe75e0 --- /dev/null +++ b/arch/powerpc/platforms/8xx/mpc86xads.h @@ -0,0 +1,47 @@ +/* + * A collection of structures, addresses, and values associated with + * the Freescale MPC86xADS board. + * Copied from the FADS stuff. + * + * Author: MontaVista Software, Inc. + * source@mvista.com + * + * 2005 (c) MontaVista Software, Inc. This file is licensed under the + * terms of the GNU General Public License version 2. This program is licensed + * "as is" without any warranty of any kind, whether express or implied. + */ + +#ifdef __KERNEL__ +#ifndef __ASM_MPC86XADS_H__ +#define __ASM_MPC86XADS_H__ + +/* Bits of interest in the BCSRs. + */ +#define BCSR1_ETHEN ((uint)0x20000000) +#define BCSR1_IRDAEN ((uint)0x10000000) +#define BCSR1_RS232EN_1 ((uint)0x01000000) +#define BCSR1_PCCEN ((uint)0x00800000) +#define BCSR1_PCCVCC0 ((uint)0x00400000) +#define BCSR1_PCCVPP0 ((uint)0x00200000) +#define BCSR1_PCCVPP1 ((uint)0x00100000) +#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1) +#define BCSR1_RS232EN_2 ((uint)0x00040000) +#define BCSR1_PCCVCC1 ((uint)0x00010000) +#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1) + +#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/ +#define BCSR4_USB_LO_SPD ((uint)0x04000000) +#define BCSR4_USB_VCC ((uint)0x02000000) +#define BCSR4_USB_FULL_SPD ((uint)0x00040000) +#define BCSR4_USB_EN ((uint)0x00020000) + +#define BCSR5_MII2_EN 0x40 +#define BCSR5_MII2_RST 0x20 +#define BCSR5_T1_RST 0x10 +#define BCSR5_ATM155_RST 0x08 +#define BCSR5_ATM25_RST 0x04 +#define BCSR5_MII1_EN 0x02 +#define BCSR5_MII1_RST 0x01 + +#endif /* __ASM_MPC86XADS_H__ */ +#endif /* __KERNEL__ */ diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c new file mode 100644 index 0000000000..e4192c0a3c --- /dev/null +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -0,0 +1,145 @@ +/*arch/powerpc/platforms/8xx/mpc86xads_setup.c + * + * Platform setup for the Freescale mpc86xads board + * + * Vitaly Bordug + * + * Copyright 2005 MontaVista Software Inc. + * + * Heavily modified by Scott Wood + * Copyright 2007 Freescale Semiconductor, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mpc86xads.h" +#include "mpc8xx.h" +#include "pic.h" + +struct cpm_pin { + int port, pin, flags; +}; + +static struct cpm_pin mpc866ads_pins[] = { + /* SMC1 */ + {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ + + /* SMC2 */ + {CPM_PORTB, 21, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTB, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ + + /* SCC1 */ + {CPM_PORTA, 6, CPM_PIN_INPUT}, /* CLK1 */ + {CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */ + {CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */ + {CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTB, 19, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */ + {CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */ + {CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */ + + /* MII */ + {CPM_PORTD, 3, CPM_PIN_OUTPUT}, + {CPM_PORTD, 4, CPM_PIN_OUTPUT}, + {CPM_PORTD, 5, CPM_PIN_OUTPUT}, + {CPM_PORTD, 6, CPM_PIN_OUTPUT}, + {CPM_PORTD, 7, CPM_PIN_OUTPUT}, + {CPM_PORTD, 8, CPM_PIN_OUTPUT}, + {CPM_PORTD, 9, CPM_PIN_OUTPUT}, + {CPM_PORTD, 10, CPM_PIN_OUTPUT}, + {CPM_PORTD, 11, CPM_PIN_OUTPUT}, + {CPM_PORTD, 12, CPM_PIN_OUTPUT}, + {CPM_PORTD, 13, CPM_PIN_OUTPUT}, + {CPM_PORTD, 14, CPM_PIN_OUTPUT}, + {CPM_PORTD, 15, CPM_PIN_OUTPUT}, + + /* I2C */ + {CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN}, + {CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN}, +}; + +static void __init init_ioports(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mpc866ads_pins); i++) { + struct cpm_pin *pin = &mpc866ads_pins[i]; + cpm1_set_pin(pin->port, pin->pin, pin->flags); + } + + cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX); + cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX); + cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK1, CPM_CLK_TX); + cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX); + + /* Set FEC1 and FEC2 to MII mode */ + clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180); +} + +static void __init mpc86xads_setup_arch(void) +{ + struct device_node *np; + u32 __iomem *bcsr_io; + + cpm_reset(); + init_ioports(); + + np = of_find_compatible_node(NULL, NULL, "fsl,mpc866ads-bcsr"); + if (!np) { + printk(KERN_CRIT "Could not find fsl,mpc866ads-bcsr node\n"); + return; + } + + bcsr_io = of_iomap(np, 0); + of_node_put(np); + + if (bcsr_io == NULL) { + printk(KERN_CRIT "Could not remap BCSR\n"); + return; + } + + clrbits32(bcsr_io, BCSR1_RS232EN_1 | BCSR1_RS232EN_2 | BCSR1_ETHEN); + iounmap(bcsr_io); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .name = "soc", }, + { .name = "cpm", }, + { .name = "localbus", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + + return 0; +} +machine_device_initcall(mpc86x_ads, declare_of_platform_devices); + +define_machine(mpc86x_ads) { + .name = "MPC86x ADS", + .compatible = "fsl,mpc866ads", + .setup_arch = mpc86xads_setup_arch, + .init_IRQ = mpc8xx_pic_init, + .get_irq = mpc8xx_get_irq, + .restart = mpc8xx_restart, + .calibrate_decr = mpc8xx_calibrate_decr, + .set_rtc_time = mpc8xx_set_rtc_time, + .get_rtc_time = mpc8xx_get_rtc_time, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/8xx/mpc885ads.h b/arch/powerpc/platforms/8xx/mpc885ads.h new file mode 100644 index 0000000000..19412f76fa --- /dev/null +++ b/arch/powerpc/platforms/8xx/mpc885ads.h @@ -0,0 +1,49 @@ +/* + * A collection of structures, addresses, and values associated with + * the Freescale MPC885ADS board. + * Copied from the FADS stuff. + * + * Author: MontaVista Software, Inc. + * source@mvista.com + * + * 2005 (c) MontaVista Software, Inc. This file is licensed under the + * terms of the GNU General Public License version 2. This program is licensed + * "as is" without any warranty of any kind, whether express or implied. + */ + +#ifdef __KERNEL__ +#ifndef __ASM_MPC885ADS_H__ +#define __ASM_MPC885ADS_H__ + +#include + +/* Bits of interest in the BCSRs. + */ +#define BCSR1_ETHEN ((uint)0x20000000) +#define BCSR1_IRDAEN ((uint)0x10000000) +#define BCSR1_RS232EN_1 ((uint)0x01000000) +#define BCSR1_PCCEN ((uint)0x00800000) +#define BCSR1_PCCVCC0 ((uint)0x00400000) +#define BCSR1_PCCVPP0 ((uint)0x00200000) +#define BCSR1_PCCVPP1 ((uint)0x00100000) +#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1) +#define BCSR1_RS232EN_2 ((uint)0x00040000) +#define BCSR1_PCCVCC1 ((uint)0x00010000) +#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1) + +#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/ +#define BCSR4_USB_LO_SPD ((uint)0x04000000) +#define BCSR4_USB_VCC ((uint)0x02000000) +#define BCSR4_USB_FULL_SPD ((uint)0x00040000) +#define BCSR4_USB_EN ((uint)0x00020000) + +#define BCSR5_MII2_EN 0x40 +#define BCSR5_MII2_RST 0x20 +#define BCSR5_T1_RST 0x10 +#define BCSR5_ATM155_RST 0x08 +#define BCSR5_ATM25_RST 0x04 +#define BCSR5_MII1_EN 0x02 +#define BCSR5_MII1_RST 0x01 + +#endif /* __ASM_MPC885ADS_H__ */ +#endif /* __KERNEL__ */ diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c new file mode 100644 index 0000000000..2d899be746 --- /dev/null +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -0,0 +1,217 @@ +/* + * Platform setup for the Freescale mpc885ads board + * + * Vitaly Bordug + * + * Copyright 2005 MontaVista Software Inc. + * + * Heavily modified by Scott Wood + * Copyright 2007 Freescale Semiconductor, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc885ads.h" +#include "mpc8xx.h" +#include "pic.h" + +static u32 __iomem *bcsr, *bcsr5; + +struct cpm_pin { + int port, pin, flags; +}; + +static struct cpm_pin mpc885ads_pins[] = { + /* SMC1 */ + {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ + + /* SMC2 */ +#ifndef CONFIG_MPC8xx_SECOND_ETH_FEC2 + {CPM_PORTE, 21, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTE, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ +#endif + + /* SCC3 */ + {CPM_PORTA, 9, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTA, 8, CPM_PIN_INPUT}, /* TX */ + {CPM_PORTC, 4, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */ + {CPM_PORTC, 5, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */ + {CPM_PORTE, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */ + {CPM_PORTE, 17, CPM_PIN_INPUT}, /* CLK5 */ + {CPM_PORTE, 16, CPM_PIN_INPUT}, /* CLK6 */ + + /* MII1 */ + {CPM_PORTA, 0, CPM_PIN_INPUT}, + {CPM_PORTA, 1, CPM_PIN_INPUT}, + {CPM_PORTA, 2, CPM_PIN_INPUT}, + {CPM_PORTA, 3, CPM_PIN_INPUT}, + {CPM_PORTA, 4, CPM_PIN_OUTPUT}, + {CPM_PORTA, 10, CPM_PIN_OUTPUT}, + {CPM_PORTA, 11, CPM_PIN_OUTPUT}, + {CPM_PORTB, 19, CPM_PIN_INPUT}, + {CPM_PORTB, 31, CPM_PIN_INPUT}, + {CPM_PORTC, 12, CPM_PIN_INPUT}, + {CPM_PORTC, 13, CPM_PIN_INPUT}, + {CPM_PORTE, 30, CPM_PIN_OUTPUT}, + {CPM_PORTE, 31, CPM_PIN_OUTPUT}, + + /* MII2 */ +#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2 + {CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 16, CPM_PIN_OUTPUT}, + {CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY}, + {CPM_PORTE, 21, CPM_PIN_OUTPUT}, + {CPM_PORTE, 22, CPM_PIN_OUTPUT}, + {CPM_PORTE, 23, CPM_PIN_OUTPUT}, + {CPM_PORTE, 24, CPM_PIN_OUTPUT}, + {CPM_PORTE, 25, CPM_PIN_OUTPUT}, + {CPM_PORTE, 26, CPM_PIN_OUTPUT}, + {CPM_PORTE, 27, CPM_PIN_OUTPUT}, + {CPM_PORTE, 28, CPM_PIN_OUTPUT}, + {CPM_PORTE, 29, CPM_PIN_OUTPUT}, +#endif + /* I2C */ + {CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN}, + {CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN}, +}; + +static void __init init_ioports(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mpc885ads_pins); i++) { + struct cpm_pin *pin = &mpc885ads_pins[i]; + cpm1_set_pin(pin->port, pin->pin, pin->flags); + } + + cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX); + cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX); + cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_TX); + cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK6, CPM_CLK_RX); + + /* Set FEC1 and FEC2 to MII mode */ + clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180); +} + +static void __init mpc885ads_setup_arch(void) +{ + struct device_node *np; + + cpm_reset(); + init_ioports(); + + np = of_find_compatible_node(NULL, NULL, "fsl,mpc885ads-bcsr"); + if (!np) { + printk(KERN_CRIT "Could not find fsl,mpc885ads-bcsr node\n"); + return; + } + + bcsr = of_iomap(np, 0); + bcsr5 = of_iomap(np, 1); + of_node_put(np); + + if (!bcsr || !bcsr5) { + printk(KERN_CRIT "Could not remap BCSR\n"); + return; + } + + clrbits32(&bcsr[1], BCSR1_RS232EN_1); +#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2 + setbits32(&bcsr[1], BCSR1_RS232EN_2); +#else + clrbits32(&bcsr[1], BCSR1_RS232EN_2); +#endif + + clrbits32(bcsr5, BCSR5_MII1_EN); + setbits32(bcsr5, BCSR5_MII1_RST); + udelay(1000); + clrbits32(bcsr5, BCSR5_MII1_RST); + +#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2 + clrbits32(bcsr5, BCSR5_MII2_EN); + setbits32(bcsr5, BCSR5_MII2_RST); + udelay(1000); + clrbits32(bcsr5, BCSR5_MII2_RST); +#else + setbits32(bcsr5, BCSR5_MII2_EN); +#endif + +#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3 + clrbits32(&bcsr[4], BCSR4_ETH10_RST); + udelay(1000); + setbits32(&bcsr[4], BCSR4_ETH10_RST); + + setbits32(&bcsr[1], BCSR1_ETHEN); + + np = of_find_node_by_path("/soc@ff000000/cpm@9c0/serial@a80"); +#else + np = of_find_node_by_path("/soc@ff000000/cpm@9c0/ethernet@a40"); +#endif + + /* The SCC3 enet registers overlap the SMC1 registers, so + * one of the two must be removed from the device tree. + */ + + if (np) { + of_detach_node(np); + of_node_put(np); + } +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .name = "soc", }, + { .name = "cpm", }, + { .name = "localbus", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + /* Publish the QE devices */ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + + return 0; +} +machine_device_initcall(mpc885_ads, declare_of_platform_devices); + +define_machine(mpc885_ads) { + .name = "Freescale MPC885 ADS", + .compatible = "fsl,mpc885ads", + .setup_arch = mpc885ads_setup_arch, + .init_IRQ = mpc8xx_pic_init, + .get_irq = mpc8xx_get_irq, + .restart = mpc8xx_restart, + .calibrate_decr = mpc8xx_calibrate_decr, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/8xx/mpc8xx.h b/arch/powerpc/platforms/8xx/mpc8xx.h new file mode 100644 index 0000000000..79fae33248 --- /dev/null +++ b/arch/powerpc/platforms/8xx/mpc8xx.h @@ -0,0 +1,20 @@ +/* + * Prototypes, etc. for the Freescale MPC8xx embedded cpu chips + * May need to be cleaned as the port goes on ... + * + * Copyright (C) 2008 Jochen Friedrich + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ +#ifndef __MPC8xx_H +#define __MPC8xx_H + +extern void __noreturn mpc8xx_restart(char *cmd); +extern void mpc8xx_calibrate_decr(void); +extern int mpc8xx_set_rtc_time(struct rtc_time *tm); +extern void mpc8xx_get_rtc_time(struct rtc_time *tm); +extern unsigned int mpc8xx_get_irq(void); + +#endif /* __MPC8xx_H */ diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c new file mode 100644 index 0000000000..ea6b0e523c --- /dev/null +++ b/arch/powerpc/platforms/8xx/pic.c @@ -0,0 +1,155 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pic.h" + + +#define PIC_VEC_SPURRIOUS 15 + +static struct irq_domain *mpc8xx_pic_host; +static unsigned long mpc8xx_cached_irq_mask; +static sysconf8xx_t __iomem *siu_reg; + +static inline unsigned long mpc8xx_irqd_to_bit(struct irq_data *d) +{ + return 0x80000000 >> irqd_to_hwirq(d); +} + +static void mpc8xx_unmask_irq(struct irq_data *d) +{ + mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d); + out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask); +} + +static void mpc8xx_mask_irq(struct irq_data *d) +{ + mpc8xx_cached_irq_mask &= ~mpc8xx_irqd_to_bit(d); + out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask); +} + +static void mpc8xx_ack(struct irq_data *d) +{ + out_be32(&siu_reg->sc_sipend, mpc8xx_irqd_to_bit(d)); +} + +static void mpc8xx_end_irq(struct irq_data *d) +{ + mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d); + out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask); +} + +static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type) +{ + /* only external IRQ senses are programmable */ + if ((flow_type & IRQ_TYPE_EDGE_FALLING) && !(irqd_to_hwirq(d) & 1)) { + unsigned int siel = in_be32(&siu_reg->sc_siel); + siel |= mpc8xx_irqd_to_bit(d); + out_be32(&siu_reg->sc_siel, siel); + irq_set_handler_locked(d, handle_edge_irq); + } + return 0; +} + +static struct irq_chip mpc8xx_pic = { + .name = "8XX SIU", + .irq_unmask = mpc8xx_unmask_irq, + .irq_mask = mpc8xx_mask_irq, + .irq_ack = mpc8xx_ack, + .irq_eoi = mpc8xx_end_irq, + .irq_set_type = mpc8xx_set_irq_type, +}; + +unsigned int mpc8xx_get_irq(void) +{ + int irq; + + /* For MPC8xx, read the SIVEC register and shift the bits down + * to get the irq number. + */ + irq = in_be32(&siu_reg->sc_sivec) >> 26; + + if (irq == PIC_VEC_SPURRIOUS) + return 0; + + return irq_linear_revmap(mpc8xx_pic_host, irq); + +} + +static int mpc8xx_pic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + pr_debug("mpc8xx_pic_host_map(%d, 0x%lx)\n", virq, hw); + + /* Set default irq handle */ + irq_set_chip_and_handler(virq, &mpc8xx_pic, handle_level_irq); + return 0; +} + + +static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + static unsigned char map_pic_senses[4] = { + IRQ_TYPE_EDGE_RISING, + IRQ_TYPE_LEVEL_LOW, + IRQ_TYPE_LEVEL_HIGH, + IRQ_TYPE_EDGE_FALLING, + }; + + if (intspec[0] > 0x1f) + return 0; + + *out_hwirq = intspec[0]; + if (intsize > 1 && intspec[1] < 4) + *out_flags = map_pic_senses[intspec[1]]; + else + *out_flags = IRQ_TYPE_NONE; + + return 0; +} + + +static const struct irq_domain_ops mpc8xx_pic_host_ops = { + .map = mpc8xx_pic_host_map, + .xlate = mpc8xx_pic_host_xlate, +}; + +void __init mpc8xx_pic_init(void) +{ + struct resource res; + struct device_node *np; + int ret; + + np = of_find_compatible_node(NULL, NULL, "fsl,pq1-pic"); + if (np == NULL) + np = of_find_node_by_type(NULL, "mpc8xx-pic"); + if (np == NULL) { + printk(KERN_ERR "Could not find fsl,pq1-pic node\n"); + return; + } + + ret = of_address_to_resource(np, 0, &res); + if (ret) + goto out; + + siu_reg = ioremap(res.start, resource_size(&res)); + if (!siu_reg) + goto out; + + mpc8xx_pic_host = irq_domain_add_linear(np, 64, &mpc8xx_pic_host_ops, NULL); + if (!mpc8xx_pic_host) + printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); + +out: + of_node_put(np); +} diff --git a/arch/powerpc/platforms/8xx/pic.h b/arch/powerpc/platforms/8xx/pic.h new file mode 100644 index 0000000000..c70f1b446f --- /dev/null +++ b/arch/powerpc/platforms/8xx/pic.h @@ -0,0 +1,19 @@ +#ifndef _PPC_KERNEL_MPC8xx_H +#define _PPC_KERNEL_MPC8xx_H + +#include +#include + +void mpc8xx_pic_init(void); +unsigned int mpc8xx_get_irq(void); + +/* + * Some internal interrupt registers use an 8-bit mask for the interrupt + * level instead of a number. + */ +static inline uint mk_int_int_mask(uint mask) +{ + return (1 << (7 - (mask/2))); +} + +#endif /* _PPC_KERNEL_PPC8xx_H */ diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c new file mode 100644 index 0000000000..d97a7910c5 --- /dev/null +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -0,0 +1,148 @@ +/* + * Platform setup for the MPC8xx based boards from TQM. + * + * Heiko Schocher + * Copyright 2010 DENX Software Engineering GmbH + * + * based on: + * Vitaly Bordug + * + * Copyright 2005 MontaVista Software Inc. + * + * Heavily modified by Scott Wood + * Copyright 2007 Freescale Semiconductor, Inc. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc8xx.h" +#include "pic.h" + +struct cpm_pin { + int port, pin, flags; +}; + +static struct cpm_pin tqm8xx_pins[] __initdata = { + /* SMC1 */ + {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ + + /* SCC1 */ + {CPM_PORTA, 5, CPM_PIN_INPUT}, /* CLK1 */ + {CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */ + {CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */ + {CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */ + {CPM_PORTC, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */ + {CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, + {CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, +}; + +static struct cpm_pin tqm8xx_fec_pins[] __initdata = { + /* MII */ + {CPM_PORTD, 3, CPM_PIN_OUTPUT}, + {CPM_PORTD, 4, CPM_PIN_OUTPUT}, + {CPM_PORTD, 5, CPM_PIN_OUTPUT}, + {CPM_PORTD, 6, CPM_PIN_OUTPUT}, + {CPM_PORTD, 7, CPM_PIN_OUTPUT}, + {CPM_PORTD, 8, CPM_PIN_OUTPUT}, + {CPM_PORTD, 9, CPM_PIN_OUTPUT}, + {CPM_PORTD, 10, CPM_PIN_OUTPUT}, + {CPM_PORTD, 11, CPM_PIN_OUTPUT}, + {CPM_PORTD, 12, CPM_PIN_OUTPUT}, + {CPM_PORTD, 13, CPM_PIN_OUTPUT}, + {CPM_PORTD, 14, CPM_PIN_OUTPUT}, + {CPM_PORTD, 15, CPM_PIN_OUTPUT}, +}; + +static void __init init_pins(int n, struct cpm_pin *pin) +{ + int i; + + for (i = 0; i < n; i++) { + cpm1_set_pin(pin->port, pin->pin, pin->flags); + pin++; + } +} + +static void __init init_ioports(void) +{ + struct device_node *dnode; + struct property *prop; + int len; + + init_pins(ARRAY_SIZE(tqm8xx_pins), &tqm8xx_pins[0]); + + cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX); + + dnode = of_find_node_by_name(NULL, "aliases"); + if (dnode == NULL) + return; + prop = of_find_property(dnode, "ethernet1", &len); + + of_node_put(dnode); + + if (prop == NULL) + return; + + /* init FEC pins */ + init_pins(ARRAY_SIZE(tqm8xx_fec_pins), &tqm8xx_fec_pins[0]); +} + +static void __init tqm8xx_setup_arch(void) +{ + cpm_reset(); + init_ioports(); +} + +static const struct of_device_id of_bus_ids[] __initconst = { + { .name = "soc", }, + { .name = "cpm", }, + { .name = "localbus", }, + { .compatible = "simple-bus" }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + + return 0; +} +machine_device_initcall(tqm8xx, declare_of_platform_devices); + +define_machine(tqm8xx) { + .name = "TQM8xx", + .compatible = "tqc,tqm8xx", + .setup_arch = tqm8xx_setup_arch, + .init_IRQ = mpc8xx_pic_init, + .get_irq = mpc8xx_get_irq, + .restart = mpc8xx_restart, + .calibrate_decr = mpc8xx_calibrate_decr, + .set_rtc_time = mpc8xx_set_rtc_time, + .get_rtc_time = mpc8xx_get_rtc_time, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig new file mode 100644 index 0000000000..1fd253f92a --- /dev/null +++ b/arch/powerpc/platforms/Kconfig @@ -0,0 +1,307 @@ +# SPDX-License-Identifier: GPL-2.0 +menu "Platform support" + +source "arch/powerpc/platforms/powernv/Kconfig" +source "arch/powerpc/platforms/pseries/Kconfig" +source "arch/powerpc/platforms/chrp/Kconfig" +source "arch/powerpc/platforms/512x/Kconfig" +source "arch/powerpc/platforms/52xx/Kconfig" +source "arch/powerpc/platforms/powermac/Kconfig" +source "arch/powerpc/platforms/maple/Kconfig" +source "arch/powerpc/platforms/pasemi/Kconfig" +source "arch/powerpc/platforms/ps3/Kconfig" +source "arch/powerpc/platforms/cell/Kconfig" +source "arch/powerpc/platforms/8xx/Kconfig" +source "arch/powerpc/platforms/82xx/Kconfig" +source "arch/powerpc/platforms/83xx/Kconfig" +source "arch/powerpc/platforms/85xx/Kconfig" +source "arch/powerpc/platforms/86xx/Kconfig" +source "arch/powerpc/platforms/embedded6xx/Kconfig" +source "arch/powerpc/platforms/44x/Kconfig" +source "arch/powerpc/platforms/40x/Kconfig" +source "arch/powerpc/platforms/amigaone/Kconfig" +source "arch/powerpc/platforms/book3s/Kconfig" +source "arch/powerpc/platforms/microwatt/Kconfig" + +config KVM_GUEST + bool "KVM Guest support" + select EPAPR_PARAVIRT + help + This option enables various optimizations for running under the KVM + hypervisor. Overhead for the kernel when not running inside KVM should + be minimal. + + In case of doubt, say Y + +config EPAPR_PARAVIRT + bool "ePAPR para-virtualization support" + help + Enables ePAPR para-virtualization support for guests. + + In case of doubt, say Y + +config PPC_HASH_MMU_NATIVE + bool + depends on PPC_BOOK3S + help + Support for running natively on the hardware, i.e. without + a hypervisor. This option is not user-selectable but should + be selected by all platforms that need it. + +config PPC_OF_BOOT_TRAMPOLINE + bool "Support booting from Open Firmware or yaboot" + depends on PPC_BOOK3S_32 || PPC64 + select RELOCATABLE if PPC64 + default y + help + Support from booting from Open Firmware or yaboot using an + Open Firmware client interface. This enables the kernel to + communicate with open firmware to retrieve system information + such as the device tree. + + In case of doubt, say Y + +config PPC_DT_CPU_FTRS + bool "Device-tree based CPU feature discovery & setup" + depends on PPC_BOOK3S_64 + default y + help + This enables code to use a new device tree binding for describing CPU + compatibility and features. Saying Y here will attempt to use the new + binding if the firmware provides it. Currently only the skiboot + firmware provides this binding. + If you're not sure say Y. + +config UDBG_RTAS_CONSOLE + bool "RTAS based debug console" + depends on PPC_RTAS + +config PPC_SMP_MUXED_IPI + bool + help + Select this option if your platform supports SMP and your + interrupt controller provides less than 4 interrupts to each + cpu. This will enable the generic code to multiplex the 4 + messages on to one ipi. + +config IPIC + bool + +config MPIC + bool + +config MPIC_TIMER + bool "MPIC Global Timer" + depends on MPIC && FSL_SOC + help + The MPIC global timer is a hardware timer inside the + Freescale PIC complying with OpenPIC standard. When the + specified interval times out, the hardware timer generates + an interrupt. The driver currently is only tested on fsl + chip, but it can potentially support other global timers + complying with the OpenPIC standard. + +config FSL_MPIC_TIMER_WAKEUP + tristate "Freescale MPIC global timer wakeup driver" + depends on FSL_SOC && MPIC_TIMER && PM + help + The driver provides a way to wake up the system by MPIC + timer. + e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup" + +config PPC_EPAPR_HV_PIC + bool + select EPAPR_PARAVIRT + +config MPIC_WEIRD + bool + +config MPIC_MSGR + bool "MPIC message register support" + depends on MPIC + help + Enables support for the MPIC message registers. These + registers are used for inter-processor communication. + +config PPC_I8259 + bool + +config U3_DART + bool + depends on PPC64 + +config PPC_RTAS + bool + +config RTAS_ERROR_LOGGING + bool + depends on PPC_RTAS + +config PPC_RTAS_DAEMON + bool + depends on PPC_RTAS + +config RTAS_PROC + bool "Proc interface to RTAS" + depends on PPC_RTAS && PROC_FS + default y + +config RTAS_FLASH + tristate "Firmware flash interface" + depends on PPC64 && RTAS_PROC + +config MMIO_NVRAM + bool + +config MPIC_U3_HT_IRQS + bool + +config MPIC_BROKEN_REGREAD + bool + depends on MPIC + help + This option enables a MPIC driver workaround for some chips + that have a bug that causes some interrupt source information + to not read back properly. It is safe to use on other chips as + well, but enabling it uses about 8KB of memory to keep copies + of the register contents in software. + +config EEH + bool + depends on (PPC_POWERNV || PPC_PSERIES) && PCI + default y + +config PPC_MPC106 + bool + +config PPC_970_NAP + bool + +config PPC_P7_NAP + bool + +config PPC_BOOK3S_IDLE + def_bool y + depends on (PPC_970_NAP || PPC_P7_NAP) + +config PPC_INDIRECT_PIO + bool + select GENERIC_IOMAP + +config PPC_INDIRECT_MMIO + bool + +config PPC_IO_WORKAROUNDS + bool + +source "drivers/cpufreq/Kconfig" + +menu "CPUIdle driver" + +source "drivers/cpuidle/Kconfig" + +endmenu + +config TAU + bool "On-chip CPU temperature sensor support" + depends on PPC_BOOK3S_32 + help + G3 and G4 processors have an on-chip temperature sensor called the + 'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die + temperature within 2-4 degrees Celsius. This option shows the current + on-die temperature in /proc/cpuinfo if the cpu supports it. + + Unfortunately, this sensor is very inaccurate when uncalibrated, so + don't assume the cpu temp is actually what /proc/cpuinfo says it is. + +config TAU_INT + bool "Interrupt driven TAU driver (EXPERIMENTAL)" + depends on TAU + help + The TAU supports an interrupt driven mode which causes an interrupt + whenever the temperature goes out of range. This is the fastest way + to get notified the temp has exceeded a range. With this option off, + a timer is used to re-check the temperature periodically. + + If in doubt, say N here. + +config TAU_AVERAGE + bool "Average high and low temp" + depends on TAU + help + The TAU hardware can compare the temperature to an upper and lower + bound. The default behavior is to show both the upper and lower + bound in /proc/cpuinfo. If the range is large, the temperature is + either changing a lot, or the TAU hardware is broken (likely on some + G4's). If the range is small (around 4 degrees), the temperature is + relatively stable. If you say Y here, a single temperature value, + halfway between the upper and lower bounds, will be reported in + /proc/cpuinfo. + + If in doubt, say N here. + +config QE_GPIO + bool "QE GPIO support" + depends on QUICC_ENGINE + select GPIOLIB + select OF_GPIO_MM_GPIOCHIP + help + Say Y here if you're going to use hardware that connects to the + QE GPIOs. + +config CPM2 + bool "Enable support for the CPM2 (Communications Processor Module)" + depends on (FSL_SOC_BOOKE && PPC32) || PPC_82xx + select CPM + select HAVE_PCI + select GPIOLIB + select OF_GPIO_MM_GPIOCHIP + help + The CPM2 (Communications Processor Module) is a coprocessor on + embedded CPUs made by Freescale. Selecting this option means that + you wish to build a kernel for a machine with a CPM2 coprocessor + on it (826x, 827x, 8560). + +config FSL_ULI1575 + bool "ULI1575 PCIe south bridge support" + depends on FSL_SOC_BOOKE || PPC_86xx + depends on PCI + select FSL_PCI + select GENERIC_ISA_DMA + help + Supports for the ULI1575 PCIe south bridge that exists on some + Freescale reference boards. The boards all use the ULI in pretty + much the same way. + +config CPM + bool + select GENERIC_ALLOCATOR + +config OF_RTC + bool + help + Uses information from the OF or flattened device tree to instantiate + platform devices for direct mapped RTC chips like the DS1742 or DS1743. + +config GEN_RTC + bool "Use the platform RTC operations from user space" + select RTC_CLASS + select RTC_DRV_GENERIC + help + This option provides backwards compatibility with the old gen_rtc.ko + module that was traditionally used for old PowerPC machines. + Platforms should migrate to enabling the RTC_DRV_GENERIC by hand + replacing their get_rtc_time/set_rtc_time callbacks with + a proper RTC device driver. + +config MCU_MPC8349EMITX + bool "MPC8349E-mITX MCU driver" + depends on I2C=y && PPC_83xx + select GPIOLIB + help + Say Y here to enable soft power-off functionality on the Freescale + boards with the MPC8349E-mITX-compatible MCU chips. This driver will + also register MCU GPIOs with the generic GPIO API, so you'll able + to use MCU pins as GPIOs. + +endmenu diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype new file mode 100644 index 0000000000..b2d8c0da2a --- /dev/null +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -0,0 +1,646 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC32 + bool + default y if !PPC64 + +config PPC64 + bool "64-bit kernel" + select ZLIB_DEFLATE + help + This option selects whether a 32-bit or a 64-bit kernel + will be built. + +menu "Processor support" +choice + prompt "Processor Type" + depends on PPC32 + help + There are five families of 32 bit PowerPC chips supported. + The most common ones are the desktop and server CPUs (603, + 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their + embedded 512x/52xx/82xx/83xx/86xx counterparts. + The other embedded parts, namely 4xx, 8xx and e500 + (85xx) each form a family of their own that is not compatible + with the others. + + If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. + +config PPC_BOOK3S_32 + bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" + imply PPC_FPU + select PPC_HAVE_PMU_SUPPORT + select HAVE_ARCH_VMAP_STACK + +config PPC_85xx + bool "Freescale 85xx" + select PPC_E500 + +config PPC_8xx + bool "Freescale 8xx" + select ARCH_SUPPORTS_HUGETLBFS + select FSL_SOC + select PPC_KUEP + select HAVE_ARCH_VMAP_STACK + select HUGETLBFS + +config 40x + bool "AMCC 40x" + select PPC_DCR_NATIVE + select PPC_UDBG_16550 + select 4xx_SOC + select HAVE_PCI + select PPC_KUEP if PPC_KUAP + +config 44x + bool "AMCC 44x, 46x or 47x" + select PPC_DCR_NATIVE + select PPC_UDBG_16550 + select 4xx_SOC + select HAVE_PCI + select PHYS_64BIT + select PPC_KUEP + +endchoice + +config PPC_BOOK3S_603 + bool "Support for 603 SW loaded TLB" + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors based on the 603 cores. Those + processors don't have a HASH MMU and provide SW TLB loading. + +config PPC_BOOK3S_604 + bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603 + depends on PPC_BOOK3S_32 + default y + help + Provide support for processors not based on the 603 cores. + Those processors have a HASH MMU. + +choice + prompt "Processor Type" + depends on PPC64 + help + There are two families of 64 bit PowerPC chips supported. + The most common ones are the desktop and server CPUs + (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...) + + The other are the "embedded" processors compliant with the + "Book 3E" variant of the architecture + +config PPC_BOOK3S_64 + bool "Server processors" + select PPC_FPU + select PPC_HAVE_PMU_SUPPORT + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION + select ARCH_ENABLE_SPLIT_PMD_PTLOCK + select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE + select ARCH_SUPPORTS_HUGETLBFS + select ARCH_SUPPORTS_NUMA_BALANCING + select HAVE_MOVE_PMD + select HAVE_MOVE_PUD + select IRQ_WORK + select PPC_64S_HASH_MMU if !PPC_RADIX_MMU + select KASAN_VMALLOC if KASAN + +config PPC_BOOK3E_64 + bool "Embedded processors" + select PPC_E500 + select PPC_E500MC + select PPC_FPU # Make it a choice ? + select PPC_SMP_MUXED_IPI + select PPC_DOORBELL + select ZONE_DMA + +endchoice + +choice + prompt "CPU selection" + help + This will create a kernel which is optimised for a particular CPU. + The resulting kernel may not run on other CPUs, so use this with care. + + If unsure, select Generic. + +config POWERPC64_CPU + bool "Generic (POWER5 and PowerPC 970 and above)" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU + +config POWERPC64_CPU + bool "Generic (POWER8 and above)" + depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN + select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX + +config POWERPC_CPU + bool "Generic 32 bits powerpc" + depends on PPC_BOOK3S_32 + +config CELL_CPU + bool "Cell Broadband Engine" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + depends on !CC_IS_CLANG + select PPC_64S_HASH_MMU + +config PPC_970_CPU + bool "PowerPC 970 (including PowerPC G5)" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU + +config POWER6_CPU + bool "POWER6" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU + +config POWER7_CPU + bool "POWER7" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX + +config POWER8_CPU + bool "POWER8" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU + select PPC_HAS_LBARX_LHARX + +config POWER9_CPU + bool "POWER9" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + select PPC_HAS_LBARX_LHARX + +config POWER10_CPU + bool "POWER10" + depends on PPC_BOOK3S_64 + select ARCH_HAS_FAST_MULTIPLIER + select PPC_HAVE_PREFIXED_SUPPORT + select PPC_HAVE_PCREL_SUPPORT + +config E5500_CPU + bool "Freescale e5500" + depends on PPC64 && PPC_E500 + +config E6500_CPU + bool "Freescale e6500" + depends on PPC64 && PPC_E500 + depends on !CC_IS_CLANG + select PPC_HAS_LBARX_LHARX + +config 405_CPU + bool "40x family" + depends on 40x + depends on !CC_IS_CLANG + +config 440_CPU + bool "440 (44x family)" + depends on 44x + +config 464_CPU + bool "464 (44x family)" + depends on 44x + depends on !CC_IS_CLANG + +config 476_CPU + bool "476 (47x family)" + depends on PPC_47x + depends on !CC_IS_CLANG + +config 860_CPU + bool "8xx family" + depends on PPC_8xx + depends on !CC_IS_CLANG + +config E300C2_CPU + bool "e300c2 (832x)" + depends on PPC_BOOK3S_32 + depends on !CC_IS_CLANG + +config E300C3_CPU + bool "e300c3 (831x)" + depends on PPC_BOOK3S_32 + depends on !CC_IS_CLANG + +config G4_CPU + bool "G4 (74xx)" + depends on PPC_BOOK3S_32 + select ALTIVEC + +config E500_CPU + bool "e500 (8540)" + depends on PPC_85xx && !PPC_E500MC + +config E500MC_CPU + bool "e500mc" + depends on PPC_85xx && PPC_E500MC + +config TOOLCHAIN_DEFAULT_CPU + bool "Rely on the toolchain's implicit default CPU" + +endchoice + +config TARGET_CPU_BOOL + bool + default !TOOLCHAIN_DEFAULT_CPU + +config TARGET_CPU + string + depends on TARGET_CPU_BOOL + default "cell" if CELL_CPU + default "970" if PPC_970_CPU + default "power6" if POWER6_CPU + default "power7" if POWER7_CPU + default "power8" if POWER8_CPU + default "power9" if POWER9_CPU + default "power10" if POWER10_CPU + default "e5500" if E5500_CPU + default "e6500" if E6500_CPU + default "power4" if POWERPC64_CPU && !CPU_LITTLE_ENDIAN + default "power8" if POWERPC64_CPU && CPU_LITTLE_ENDIAN + default "405" if 405_CPU + default "440" if 440_CPU + default "464" if 464_CPU + default "476" if 476_CPU + default "860" if 860_CPU + default "e300c2" if E300C2_CPU + default "e300c3" if E300C3_CPU + default "G4" if G4_CPU + default "8540" if E500_CPU + default "e500mc" if E500MC_CPU + default "powerpc" if POWERPC_CPU + +config TUNE_CPU + string + depends on POWERPC64_CPU + default "-mtune=power10" if $(cc-option,-mtune=power10) + default "-mtune=power9" if $(cc-option,-mtune=power9) + default "-mtune=power8" if $(cc-option,-mtune=power8) + +config PPC_BOOK3S + def_bool y + depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 + +config PPC_E500 + select FSL_EMB_PERFMON + bool + select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64 + select PPC_SMP_MUXED_IPI + select PPC_DOORBELL + select PPC_KUEP + +config PPC_E500MC + bool "e500mc Support" + select PPC_FPU + select COMMON_CLK + depends on PPC_E500 + help + This must be enabled for running on e500mc (and derivatives + such as e5500/e6500), and must be disabled for running on + e500v1 or e500v2. + +config PPC_FPU_REGS + bool + +config PPC_FPU + bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x + default y if PPC64 + select PPC_FPU_REGS + help + This must be enabled to support the Floating Point Unit + Most 6xx have an FPU but e300c2 core (mpc832x) don't have + an FPU, so when building an embedded kernel for that target + you can disable FPU support. + + If unsure say Y. + +config FSL_EMB_PERFMON + bool "Freescale Embedded Perfmon" + depends on PPC_E500 || PPC_83xx + help + This is the Performance Monitor support found on the e500 core + and some e300 cores (c3 and c4). Select this only if your + core supports the Embedded Performance Monitor APU + +config FSL_EMB_PERF_EVENT + bool + depends on FSL_EMB_PERFMON && PERF_EVENTS && !PPC_PERF_CTRS + default y + +config FSL_EMB_PERF_EVENT_E500 + bool + depends on FSL_EMB_PERF_EVENT && PPC_E500 + default y + +config 4xx + bool + depends on 40x || 44x + default y + +config BOOKE + bool + depends on PPC_E500 || 44x + default y + +config BOOKE_OR_40x + bool + depends on BOOKE || 40x + default y + +config PTE_64BIT + bool + depends on 44x || PPC_E500 || PPC_86xx + default y if PHYS_64BIT + +config PHYS_64BIT + bool 'Large physical address support' if PPC_E500 || PPC_86xx + depends on (44x || PPC_E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx + select PHYS_ADDR_T_64BIT + help + This option enables kernel support for larger than 32-bit physical + addresses. This feature may not be available on all cores. + + If you have more than 3.5GB of RAM or so, you also need to enable + SWIOTLB under Kernel Options for this to work. The actual number + is platform-dependent. + + If in doubt, say N here. + +config ALTIVEC + bool "AltiVec Support" + depends on PPC_BOOK3S || (PPC_E500MC && PPC64 && !E5500_CPU) + select PPC_FPU + help + This option enables kernel support for the Altivec extensions to the + PowerPC processor. The kernel currently supports saving and restoring + altivec registers, and turning on the 'altivec enable' bit so user + processes can execute altivec instructions. + + This option is only usefully if you have a processor that supports + altivec (G4, otherwise known as 74xx series), but does not have + any affect on a non-altivec cpu (it does, however add code to the + kernel). + + If in doubt, say Y here. + +config VSX + bool "VSX Support" + depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU + help + + This option enables kernel support for the Vector Scaler extensions + to the PowerPC processor. The kernel currently supports saving and + restoring VSX registers, and turning on the 'VSX enable' bit so user + processes can execute VSX instructions. + + This option is only useful if you have a processor that supports + VSX (P7 and above), but does not have any affect on a non-VSX + CPUs (it does, however add code to the kernel). + + If in doubt, say Y here. + +config SPE_POSSIBLE + def_bool y + depends on PPC_E500 && !PPC_E500MC + +config SPE + bool "SPE Support" + depends on SPE_POSSIBLE + default y + help + This option enables kernel support for the Signal Processing + Extensions (SPE) to the PowerPC processor. The kernel currently + supports saving and restoring SPE registers, and turning on the + 'spe enable' bit so user processes can execute SPE instructions. + + This option is only useful if you have a processor that supports + SPE (e500, otherwise known as 85xx series), but does not have any + effect on a non-spe cpu (it does, however add code to the kernel). + + If in doubt, say Y here. + +config PPC_64S_HASH_MMU + bool "Hash MMU Support" + depends on PPC_BOOK3S_64 + default y + help + Enable support for the Power ISA Hash style MMU. This is implemented + by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The + OpenPOWER ISA does not mandate the hash MMU and some CPUs do not + implement it (e.g., Microwatt). + + Note that POWER9 PowerVM platforms only support the hash + MMU. From POWER10 radix is also supported by PowerVM. + + If you're unsure, say Y. + +config PPC_RADIX_MMU + bool "Radix MMU Support" + depends on PPC_BOOK3S_64 + select ARCH_HAS_GIGANTIC_PAGE + default y + help + Enable support for the Power ISA 3.0 Radix style MMU. Currently this + is only implemented by IBM Power9 CPUs, if you don't have one of them + you can probably disable this. + +config PPC_RADIX_MMU_DEFAULT + bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU + depends on PPC_BOOK3S_64 + depends on PPC_RADIX_MMU + default y + help + When the hardware supports the Radix MMU, default to using it unless + "disable_radix[=yes]" is specified on the kernel command line. + + If this option is disabled, the Hash MMU will be used by default, + unless "disable_radix=no" is specified on the kernel command line. + + If you're unsure, say Y. + +config PPC_KERNEL_PREFIXED + depends on PPC_HAVE_PREFIXED_SUPPORT + depends on CC_HAS_PREFIXED + default n + bool "Build Kernel with Prefixed Instructions" + help + POWER10 and later CPUs support prefixed instructions, 8 byte + instructions that include large immediate, pc relative addressing, + and various floating point, vector, MMA. + + This option builds the kernel with prefixed instructions, and + allows a pc relative addressing option to be selected. + + Kernel support for prefixed instructions in applications and guests + is not affected by this option. + +config PPC_KERNEL_PCREL + depends on PPC_HAVE_PCREL_SUPPORT + depends on PPC_HAVE_PREFIXED_SUPPORT + depends on CC_HAS_PCREL + default n + select PPC_KERNEL_PREFIXED + bool "Build Kernel with PC-Relative addressing model" + help + POWER10 and later CPUs support pc relative addressing. Recent + compilers have support for an ELF ABI extension for a pc relative + ABI. + + This option builds the kernel with the pc relative ABI model. + +config PPC_KUEP + bool "Kernel Userspace Execution Prevention" if !40x + default y if !40x + help + Enable support for Kernel Userspace Execution Prevention (KUEP) + + If you're unsure, say Y. + +config PPC_KUAP + bool "Kernel Userspace Access Protection" + default y + help + Enable support for Kernel Userspace Access Protection (KUAP) + + If you're unsure, say Y. + +config PPC_KUAP_DEBUG + bool "Extra debugging for Kernel Userspace Access Protection" + depends on PPC_KUAP + help + Add extra debugging for Kernel Userspace Access Protection (KUAP) + If you're unsure, say N. + +config PPC_PKEY + def_bool y + depends on PPC_BOOK3S_64 + depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP + + +config PPC_MMU_NOHASH + def_bool y + depends on !PPC_BOOK3S + +config PPC_HAVE_PMU_SUPPORT + bool + +config PPC_HAVE_PREFIXED_SUPPORT + bool + +config PPC_HAVE_PCREL_SUPPORT + bool + +config PMU_SYSFS + bool "Create PMU SPRs sysfs file" + default n + help + This option enables sysfs file creation for PMU SPRs like MMCR* and PMC*. + +config PPC_PERF_CTRS + def_bool y + depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT + help + This enables the powerpc-specific perf_event back-end. + +config FORCE_SMP + # Allow platforms to force SMP=y by selecting this + bool + select SMP + +config SMP + depends on PPC_BOOK3S || PPC_E500 || PPC_47x + select GENERIC_IRQ_MIGRATION + bool "Symmetric multi-processing support" if !FORCE_SMP + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. Note that the kernel does not currently + support SMP machines with 603/603e/603ev or PPC750 ("G3") processors + since they have inadequate hardware support for multiprocessor + operation. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on single-processor machines. + On a single-processor machine, the kernel will run faster if you say + N here. + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-8192)" if SMP + range 2 8192 if SMP + default "1" if !SMP + default "32" if PPC64 + default "4" + +config NOT_COHERENT_CACHE + bool + depends on 4xx || PPC_8xx || PPC_MPC512x || \ + GAMECUBE_COMMON || AMIGAONE + select ARCH_HAS_DMA_PREP_COHERENT + select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_SYNC_DMA_FOR_CPU + select DMA_DIRECT_REMAP + default n if PPC_47x + default y + +config CHECK_CACHE_COHERENCY + bool + +config PPC_DOORBELL + bool + +endmenu + +config VDSO32 + def_bool y + depends on PPC32 || COMPAT + help + This symbol controls whether we build the 32-bit VDSO. We obviously + want to do that if we're building a 32-bit kernel. If we're building + a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling + COMPAT. + +choice + prompt "Endianness selection" + default CPU_BIG_ENDIAN + help + This option selects whether a big endian or little endian kernel will + be built. + +config CPU_BIG_ENDIAN + bool "Build big endian kernel" + help + Build a big endian kernel. + + If unsure, select this option. + +config CPU_LITTLE_ENDIAN + bool "Build little endian kernel" + depends on PPC_BOOK3S_64 + select PPC64_BOOT_WRAPPER + help + Build a little endian kernel. + + Note that if cross compiling a little endian kernel, + CROSS_COMPILE must point to a toolchain capable of targeting + little endian powerpc. + +endchoice + +config PPC64_ELF_ABI_V1 + def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2) + +config PPC64_ELF_ABI_V2 + def_bool PPC64 && !PPC64_ELF_ABI_V1 + +config PPC64_BOOT_WRAPPER + def_bool n + depends on CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile new file mode 100644 index 0000000000..94470fb27c --- /dev/null +++ b/arch/powerpc/platforms/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o + +obj-$(CONFIG_PPC_PMAC) += powermac/ +obj-$(CONFIG_PPC_CHRP) += chrp/ +obj-$(CONFIG_4xx) += 4xx/ +obj-$(CONFIG_40x) += 40x/ +obj-$(CONFIG_44x) += 44x/ +obj-$(CONFIG_PPC_MPC512x) += 512x/ +obj-$(CONFIG_PPC_MPC52xx) += 52xx/ +obj-$(CONFIG_PPC_8xx) += 8xx/ +obj-$(CONFIG_PPC_82xx) += 82xx/ +obj-$(CONFIG_PPC_83xx) += 83xx/ +obj-$(CONFIG_FSL_SOC_BOOKE) += 85xx/ +obj-$(CONFIG_PPC_86xx) += 86xx/ +obj-$(CONFIG_PPC_POWERNV) += powernv/ +obj-$(CONFIG_PPC_PSERIES) += pseries/ +obj-$(CONFIG_PPC_MAPLE) += maple/ +obj-$(CONFIG_PPC_PASEMI) += pasemi/ +obj-$(CONFIG_PPC_CELL) += cell/ +obj-$(CONFIG_PPC_PS3) += ps3/ +obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ +obj-$(CONFIG_AMIGAONE) += amigaone/ +obj-$(CONFIG_PPC_BOOK3S) += book3s/ +obj-$(CONFIG_PPC_MICROWATT) += microwatt/ diff --git a/arch/powerpc/platforms/amigaone/Kconfig b/arch/powerpc/platforms/amigaone/Kconfig new file mode 100644 index 0000000000..0741edb10b --- /dev/null +++ b/arch/powerpc/platforms/amigaone/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +config AMIGAONE + bool "Eyetech AmigaOne/MAI Teron" + depends on PPC_BOOK3S_32 && BROKEN_ON_SMP + select PPC_I8259 + select PPC_INDIRECT_PCI + select PPC_UDBG_16550 + select FORCE_PCI + select NOT_COHERENT_CACHE + select CHECK_CACHE_COHERENCY + select DEFAULT_UIMAGE + select HAVE_PCSPKR_PLATFORM + help + Select AmigaOne for the following machines: + - AmigaOne SE/Teron CX (G3 only) + - AmigaOne XE/Teron PX + - uA1/Teron mini + More information is available at: + . diff --git a/arch/powerpc/platforms/amigaone/Makefile b/arch/powerpc/platforms/amigaone/Makefile new file mode 100644 index 0000000000..e95e4e3e2d --- /dev/null +++ b/arch/powerpc/platforms/amigaone/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += setup.o diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c new file mode 100644 index 0000000000..6c6e714a75 --- /dev/null +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AmigaOne platform setup + * + * Copyright 2008 Gerhard Pircher (gerhard_pircher@gmx.net) + * + * Based on original amigaone_setup.c source code + * Copyright 2003 by Hans-Joerg Frieden and Thomas Frieden + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +extern void __flush_disable_L1(void); + +void amigaone_show_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "vendor\t\t: Eyetech Ltd.\n"); +} + +static int __init amigaone_add_bridge(struct device_node *dev) +{ + const u32 *cfg_addr, *cfg_data; + int len; + const int *bus_range; + struct pci_controller *hose; + + printk(KERN_INFO "Adding PCI host bridge %pOF\n", dev); + + cfg_addr = of_get_address(dev, 0, NULL, NULL); + cfg_data = of_get_address(dev, 1, NULL, NULL); + if ((cfg_addr == NULL) || (cfg_data == NULL)) + return -ENODEV; + + bus_range = of_get_property(dev, "bus-range", &len); + if ((bus_range == NULL) || (len < 2 * sizeof(int))) + printk(KERN_WARNING "Can't get bus-range for %pOF, assume" + " bus 0\n", dev); + + hose = pcibios_alloc_controller(dev); + if (hose == NULL) + return -ENOMEM; + + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + setup_indirect_pci(hose, cfg_addr[0], cfg_data[0], 0); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, 1); + + return 0; +} + +void __init amigaone_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0); +} + +static void __init amigaone_discover_phbs(void) +{ + struct device_node *np; + int phb = -ENODEV; + + /* Lookup PCI host bridges. */ + for_each_compatible_node(np, "pci", "mai-logic,articia-s") + phb = amigaone_add_bridge(np); + + BUG_ON(phb != 0); +} + +void __init amigaone_init_IRQ(void) +{ + struct device_node *pic, *np = NULL; + const unsigned long *prop = NULL; + unsigned long int_ack = 0; + + /* Search for ISA interrupt controller. */ + pic = of_find_compatible_node(NULL, "interrupt-controller", + "pnpPNP,000"); + BUG_ON(pic == NULL); + + /* Look for interrupt acknowledge address in the PCI root node. */ + np = of_find_compatible_node(NULL, "pci", "mai-logic,articia-s"); + if (np) { + prop = of_get_property(np, "8259-interrupt-acknowledge", NULL); + if (prop) + int_ack = prop[0]; + of_node_put(np); + } + + if (int_ack == 0) + printk(KERN_WARNING "Cannot find PCI interrupt acknowledge" + " address, polling\n"); + + i8259_init(pic, int_ack); + ppc_md.get_irq = i8259_irq; + irq_set_default_host(i8259_get_host()); +} + +static int __init request_isa_regions(void) +{ + request_region(0x00, 0x20, "dma1"); + request_region(0x40, 0x20, "timer"); + request_region(0x80, 0x10, "dma page reg"); + request_region(0xc0, 0x20, "dma2"); + + return 0; +} +machine_device_initcall(amigaone, request_isa_regions); + +void __noreturn amigaone_restart(char *cmd) +{ + local_irq_disable(); + + /* Flush and disable caches. */ + __flush_disable_L1(); + + /* Set SRR0 to the reset vector and turn on MSR_IP. */ + mtspr(SPRN_SRR0, 0xfff00100); + mtspr(SPRN_SRR1, MSR_IP); + + /* Do an rfi to jump back to firmware. */ + __asm__ __volatile__("rfi" : : : "memory"); + + /* Not reached. */ + while (1); +} + +static int __init amigaone_probe(void) +{ + /* + * Coherent memory access cause complete system lockup! Thus + * disable this CPU feature, even if the CPU needs it. + */ + cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT; + + DMA_MODE_READ = 0x44; + DMA_MODE_WRITE = 0x48; + + return 1; +} + +define_machine(amigaone) { + .name = "AmigaOne", + .compatible = "eyetech,amigaone", + .probe = amigaone_probe, + .setup_arch = amigaone_setup_arch, + .discover_phbs = amigaone_discover_phbs, + .show_cpuinfo = amigaone_show_cpuinfo, + .init_IRQ = amigaone_init_IRQ, + .restart = amigaone_restart, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig new file mode 100644 index 0000000000..34c931592e --- /dev/null +++ b/arch/powerpc/platforms/book3s/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_VAS + bool "IBM Virtual Accelerator Switchboard (VAS)" + depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES + default y + help + This enables support for IBM Virtual Accelerator Switchboard (VAS). + + VAS devices are found in POWER9-based and later systems, they + provide access to accelerator coprocessors such as NX-GZIP and + NX-842. This config allows the kernel to use NX-842 accelerators, + and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV + and POWER10 PowerVM platforms. + + If unsure, say "N". diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile new file mode 100644 index 0000000000..e790f1910f --- /dev/null +++ b/arch/powerpc/platforms/book3s/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_PPC_VAS) += vas-api.o diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c new file mode 100644 index 0000000000..f381b177ea --- /dev/null +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -0,0 +1,634 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#define pr_fmt(fmt) "vas-api: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documentation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; + const struct vas_user_win_ops *vops; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(const struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +/* + * Take reference to pid and mm + */ +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref) +{ + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + task_ref->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + task_ref->mm = get_task_mm(current); + if (!task_ref->mm) { + put_pid(task_ref->pid); + pr_err("pid(%d): mm_struct is not found\n", + current->pid); + return -EPERM; + } + + mmgrab(task_ref->mm); + mmput(task_ref->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. So takes tgid + * reference until window closed to make sure tgid is not + * reused. + */ + task_ref->tgid = find_get_pid(task_tgid_vnr(current)); + + return 0; +} + +/* + * Successful return must release the task reference with + * put_task_struct + */ +static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref, + struct task_struct **tskp, struct pid **pidp) +{ + struct task_struct *tsk; + struct pid *pid; + + pid = task_ref->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + if (!tsk) { + pid = task_ref->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return false; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return false; + } + + *tskp = tsk; + *pidp = pid; + + return true; +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!task_ref->mm)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_FAULT_ADDRESS; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + + if (!ref_get_pid_and_task(task_ref, &tsk, &pid)) + return; + + kthread_use_mm(task_ref->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + kthread_unuse_mm(task_ref->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("pid %d kill_proc_info() rc %d\n", pid_vnr(pid), rc); +} + +void vas_dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("copy_from_user() returns %d\n", rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid window open API version\n"); + return -EINVAL; + } + + if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) { + pr_err("VAS API is not registered\n"); + return -EACCES; + } + + txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags, + cp_inst->coproc->cop_type); + if (IS_ERR(txwin)) { + pr_err_ratelimited("VAS window open failed rc=%ld\n", + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + mutex_init(&txwin->task_ref.mmap_mutex); + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + int rc; + + if (cp_inst->txwin) { + if (cp_inst->coproc->vops && + cp_inst->coproc->vops->close_win) { + rc = cp_inst->coproc->vops->close_win(cp_inst->txwin); + if (rc) + return rc; + } + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +/* + * If the executed instruction that caused the fault was a paste, then + * clear regs CR0[EQ], advance NIP, and return 0. Else return error code. + */ +static int do_fail_paste(void) +{ + struct pt_regs *regs = current->thread.regs; + u32 instword; + + if (WARN_ON_ONCE(!regs)) + return -EINVAL; + + if (WARN_ON_ONCE(!user_mode(regs))) + return -EINVAL; + + /* + * If we couldn't translate the instruction, the driver should + * return success without handling the fault, it will be retried + * or the instruction fetch will fault. + */ + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EAGAIN; + + /* + * Not a paste instruction, driver may fail the fault. + */ + if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE) + return -ENOENT; + + regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */ + regs_add_return_ip(regs, 4); /* Emulate the paste */ + + return 0; +} + +/* + * This fault handler is invoked when the core generates page fault on + * the paste address. Happens if the kernel closes window in hypervisor + * (on pseries) due to lost credit or the paste address is not mapped. + */ +static vm_fault_t vas_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *fp = vma->vm_file; + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + vm_fault_t fault; + u64 paste_addr; + int ret; + + /* + * window is not opened. Shouldn't expect this error. + */ + if (!cp_inst || !cp_inst->txwin) { + pr_err("Unexpected fault on paste address with TX window closed\n"); + return VM_FAULT_SIGBUS; + } + + txwin = cp_inst->txwin; + /* + * When the LPAR lost credits due to core removal or during + * migration, invalidate the existing mapping for the current + * paste addresses and set windows in-active (zap_vma_pages in + * reconfig_close_windows()). + * New mapping will be done later after migration or new credits + * available. So continue to receive faults if the user space + * issue NX request. + */ + if (txwin->task_ref.vma != vmf->vma) { + pr_err("No previous mapping with paste address\n"); + return VM_FAULT_SIGBUS; + } + + mutex_lock(&txwin->task_ref.mmap_mutex); + /* + * The window may be inactive due to lost credit (Ex: core + * removal with DLPAR). If the window is active again when + * the credit is available, map the new paste address at the + * window virtual address. + */ + if (txwin->status == VAS_WIN_ACTIVE) { + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (paste_addr) { + fault = vmf_insert_pfn(vma, vma->vm_start, + (paste_addr >> PAGE_SHIFT)); + mutex_unlock(&txwin->task_ref.mmap_mutex); + return fault; + } + } + mutex_unlock(&txwin->task_ref.mmap_mutex); + + /* + * Received this fault due to closing the actual window. + * It can happen during migration or lost credits. + * Since no mapping, return the paste instruction failure + * to the user space. + */ + ret = do_fail_paste(); + /* + * The user space can retry several times until success (needed + * for migration) or should fallback to SW compression or + * manage with the existing open windows if available. + * Looking at sysfs interface, it can determine whether these + * failures are coming during migration or core removal: + * nr_used_credits > nr_total_credits when lost credits + */ + if (!ret || (ret == -EAGAIN)) + return VM_FAULT_NOPAGE; + + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct vas_vm_ops = { + .fault = vas_mmap_fault, +}; + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("size 0x%zx, PAGE_SIZE 0x%zx\n", + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("No send window open?\n"); + return -EINVAL; + } + + if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) { + pr_err("VAS API is not registered\n"); + return -EACCES; + } + + /* + * The initial mmap is done after the window is opened + * with ioctl. But before mmap(), this window can be closed in + * the hypervisor due to lost credit (core removal on pseries). + * So if the window is not active, return mmap() failure with + * -EACCES and expects the user space reissue mmap() when it + * is active again or open new window when the credit is available. + * mmap_mutex protects the paste address mmap() with DLPAR + * close/open event and allows mmap() only when the window is + * active. + */ + mutex_lock(&txwin->task_ref.mmap_mutex); + if (txwin->status != VAS_WIN_ACTIVE) { + pr_err("Window is not active\n"); + rc = -EACCES; + goto out; + } + + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (!paste_addr) { + pr_err("Window paste address failed\n"); + rc = -EINVAL; + goto out; + } + + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("paste addr %llx at %lx, rc %d\n", paste_addr, + vma->vm_start, rc); + + txwin->task_ref.vma = vma; + vma->vm_ops = &vas_vm_ops; + +out: + mutex_unlock(&txwin->task_ref.mmap_mutex); + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name, + const struct vas_user_win_ops *vops) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + coproc_device.vops = vops; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("Added dev [%d,%d]\n", MAJOR(devno), MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig new file mode 100644 index 0000000000..34669b060f --- /dev/null +++ b/arch/powerpc/platforms/cell/Kconfig @@ -0,0 +1,104 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_CELL + select PPC_64S_HASH_MMU if PPC64 + bool + +config PPC_CELL_COMMON + bool + select PPC_CELL + select PPC_DCR_MMIO + select PPC_INDIRECT_PIO + select PPC_INDIRECT_MMIO + select PPC_HASH_MMU_NATIVE + select PPC_RTAS + select IRQ_EDGE_EOI_HANDLER + +config PPC_CELL_NATIVE + bool + select PPC_CELL_COMMON + select MPIC + select PPC_IO_WORKAROUNDS + select IBM_EMAC_EMAC4 if IBM_EMAC + select IBM_EMAC_RGMII if IBM_EMAC + select IBM_EMAC_ZMII if IBM_EMAC #test only + select IBM_EMAC_TAH if IBM_EMAC #test only + +config PPC_IBM_CELL_BLADE + bool "IBM Cell Blade" + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN + select PPC_CELL_NATIVE + select PPC_OF_PLATFORM_PCI + select FORCE_PCI + select MMIO_NVRAM + select PPC_UDBG_16550 + select UDBG_RTAS_CONSOLE + +config AXON_MSI + bool + depends on PPC_IBM_CELL_BLADE && PCI_MSI + select IRQ_DOMAIN_NOMAP + default y + +menu "Cell Broadband Engine options" + depends on PPC_CELL + +config SPU_FS + tristate "SPU file system" + default m + depends on PPC_CELL + depends on COREDUMP + select SPU_BASE + help + The SPU file system is used to access Synergistic Processing + Units on machines implementing the Broadband Processor + Architecture. + +config SPU_BASE + bool + select PPC_COPRO_BASE + +config CBE_RAS + bool "RAS features for bare metal Cell BE" + depends on PPC_CELL_NATIVE + default y + +config PPC_IBM_CELL_RESETBUTTON + bool "IBM Cell Blade Pinhole reset button" + depends on CBE_RAS && PPC_IBM_CELL_BLADE + default y + help + Support Pinhole Resetbutton on IBM Cell blades. + This adds a method to trigger system reset via front panel pinhole button. + +config PPC_IBM_CELL_POWERBUTTON + tristate "IBM Cell Blade power button" + depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV + default y + help + Support Powerbutton on IBM Cell blades. + This will enable the powerbutton as an input device. + +config CBE_THERM + tristate "CBE thermal support" + default m + depends on CBE_RAS && SPU_BASE + +config PPC_PMI + tristate + default y + depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON + help + PMI (Platform Management Interrupt) is a way to + communicate with the BMC (Baseboard Management Controller). + It is used in some IBM Cell blades. + +config CBE_CPUFREQ_SPU_GOVERNOR + tristate "CBE frequency scaling based on SPU usage" + depends on SPU_FS && CPU_FREQ + default m + help + This governor checks for spu usage to adjust the cpu frequency. + If no spu is running on a given cpu, that cpu will be throttled to + the minimal possible frequency. + +endmenu diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile new file mode 100644 index 0000000000..7ea6692f67 --- /dev/null +++ b/arch/powerpc/platforms/cell/Makefile @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o + +obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \ + pmu.o spider-pci.o +obj-$(CONFIG_CBE_RAS) += ras.o + +obj-$(CONFIG_CBE_THERM) += cbe_thermal.o +obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o + +obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o + +ifdef CONFIG_SMP +obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o +endif + +# needed only when building loadable spufs.ko +spu-priv1-$(CONFIG_PPC_CELL_COMMON) += spu_priv1_mmio.o +spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o + +obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ + spu_syscalls.o \ + $(spu-priv1-y) \ + $(spu-manage-y) \ + spufs/ + +obj-$(CONFIG_AXON_MSI) += axon_msi.o diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c new file mode 100644 index 0000000000..28dc86744c --- /dev/null +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2007, Michael Ellerman, IBM Corporation. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "cell.h" + +/* + * MSIC registers, specified as offsets from dcr_base + */ +#define MSIC_CTRL_REG 0x0 + +/* Base Address registers specify FIFO location in BE memory */ +#define MSIC_BASE_ADDR_HI_REG 0x3 +#define MSIC_BASE_ADDR_LO_REG 0x4 + +/* Hold the read/write offsets into the FIFO */ +#define MSIC_READ_OFFSET_REG 0x5 +#define MSIC_WRITE_OFFSET_REG 0x6 + + +/* MSIC control register flags */ +#define MSIC_CTRL_ENABLE 0x0001 +#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002 +#define MSIC_CTRL_IRQ_ENABLE 0x0008 +#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010 + +/* + * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB. + * Currently we're using a 64KB FIFO size. + */ +#define MSIC_FIFO_SIZE_SHIFT 16 +#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT) + +/* + * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits + * 8-9 of the MSIC control reg. + */ +#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300) + +/* + * We need to mask the read/write offsets to make sure they stay within + * the bounds of the FIFO. Also they should always be 16-byte aligned. + */ +#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu) + +/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */ +#define MSIC_FIFO_ENTRY_SIZE 0x10 + + +struct axon_msic { + struct irq_domain *irq_domain; + __le32 *fifo_virt; + dma_addr_t fifo_phys; + dcr_host_t dcr_host; + u32 read_offset; +#ifdef DEBUG + u32 __iomem *trigger; +#endif +}; + +#ifdef DEBUG +void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic); +#else +static inline void axon_msi_debug_setup(struct device_node *dn, + struct axon_msic *msic) { } +#endif + + +static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) +{ + pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); + + dcr_write(msic->dcr_host, dcr_n, val); +} + +static void axon_msi_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct axon_msic *msic = irq_desc_get_handler_data(desc); + u32 write_offset, msi; + int idx; + int retry = 0; + + write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG); + pr_devel("axon_msi: original write_offset 0x%x\n", write_offset); + + /* write_offset doesn't wrap properly, so we have to mask it */ + write_offset &= MSIC_FIFO_SIZE_MASK; + + while (msic->read_offset != write_offset && retry < 100) { + idx = msic->read_offset / sizeof(__le32); + msi = le32_to_cpu(msic->fifo_virt[idx]); + msi &= 0xFFFF; + + pr_devel("axon_msi: woff %x roff %x msi %x\n", + write_offset, msic->read_offset, msi); + + if (msi < nr_irqs && irq_get_chip_data(msi) == msic) { + generic_handle_irq(msi); + msic->fifo_virt[idx] = cpu_to_le32(0xffffffff); + } else { + /* + * Reading the MSIC_WRITE_OFFSET_REG does not + * reliably flush the outstanding DMA to the + * FIFO buffer. Here we were reading stale + * data, so we need to retry. + */ + udelay(1); + retry++; + pr_devel("axon_msi: invalid irq 0x%x!\n", msi); + continue; + } + + if (retry) { + pr_devel("axon_msi: late irq 0x%x, retry %d\n", + msi, retry); + retry = 0; + } + + msic->read_offset += MSIC_FIFO_ENTRY_SIZE; + msic->read_offset &= MSIC_FIFO_SIZE_MASK; + } + + if (retry) { + printk(KERN_WARNING "axon_msi: irq timed out\n"); + + msic->read_offset += MSIC_FIFO_ENTRY_SIZE; + msic->read_offset &= MSIC_FIFO_SIZE_MASK; + } + + chip->irq_eoi(&desc->irq_data); +} + +static struct axon_msic *find_msi_translator(struct pci_dev *dev) +{ + struct irq_domain *irq_domain; + struct device_node *dn, *tmp; + const phandle *ph; + struct axon_msic *msic = NULL; + + dn = of_node_get(pci_device_to_OF_node(dev)); + if (!dn) { + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); + return NULL; + } + + for (; dn; dn = of_get_next_parent(dn)) { + ph = of_get_property(dn, "msi-translator", NULL); + if (ph) + break; + } + + if (!ph) { + dev_dbg(&dev->dev, + "axon_msi: no msi-translator property found\n"); + goto out_error; + } + + tmp = dn; + dn = of_find_node_by_phandle(*ph); + of_node_put(tmp); + if (!dn) { + dev_dbg(&dev->dev, + "axon_msi: msi-translator doesn't point to a node\n"); + goto out_error; + } + + irq_domain = irq_find_host(dn); + if (!irq_domain) { + dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %pOF\n", + dn); + goto out_error; + } + + msic = irq_domain->host_data; + +out_error: + of_node_put(dn); + + return msic; +} + +static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) +{ + struct device_node *dn; + int len; + const u32 *prop; + + dn = of_node_get(pci_device_to_OF_node(dev)); + if (!dn) { + dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n"); + return -ENODEV; + } + + for (; dn; dn = of_get_next_parent(dn)) { + if (!dev->no_64bit_msi) { + prop = of_get_property(dn, "msi-address-64", &len); + if (prop) + break; + } + + prop = of_get_property(dn, "msi-address-32", &len); + if (prop) + break; + } + + if (!prop) { + dev_dbg(&dev->dev, + "axon_msi: no msi-address-(32|64) properties found\n"); + of_node_put(dn); + return -ENOENT; + } + + switch (len) { + case 8: + msg->address_hi = prop[0]; + msg->address_lo = prop[1]; + break; + case 4: + msg->address_hi = 0; + msg->address_lo = prop[0]; + break; + default: + dev_dbg(&dev->dev, + "axon_msi: malformed msi-address-(32|64) property\n"); + of_node_put(dn); + return -EINVAL; + } + + of_node_put(dn); + + return 0; +} + +static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + unsigned int virq, rc; + struct msi_desc *entry; + struct msi_msg msg; + struct axon_msic *msic; + + msic = find_msi_translator(dev); + if (!msic) + return -ENODEV; + + rc = setup_msi_msg_address(dev, &msg); + if (rc) + return rc; + + msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) { + virq = irq_create_direct_mapping(msic->irq_domain); + if (!virq) { + dev_warn(&dev->dev, + "axon_msi: virq allocation failed!\n"); + return -1; + } + dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq); + + irq_set_msi_desc(virq, entry); + msg.data = virq; + pci_write_msi_msg(virq, &msg); + } + + return 0; +} + +static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) +{ + struct msi_desc *entry; + + dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); + + msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) { + irq_set_msi_desc(entry->irq, NULL); + irq_dispose_mapping(entry->irq); + entry->irq = 0; + } +} + +static struct irq_chip msic_irq_chip = { + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, + .irq_shutdown = pci_msi_mask_irq, + .name = "AXON-MSI", +}; + +static int msic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq); + + return 0; +} + +static const struct irq_domain_ops msic_host_ops = { + .map = msic_host_map, +}; + +static void axon_msi_shutdown(struct platform_device *device) +{ + struct axon_msic *msic = dev_get_drvdata(&device->dev); + u32 tmp; + + pr_devel("axon_msi: disabling %pOF\n", + irq_domain_get_of_node(msic->irq_domain)); + tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG); + tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; + msic_dcr_write(msic, MSIC_CTRL_REG, tmp); +} + +static int axon_msi_probe(struct platform_device *device) +{ + struct device_node *dn = device->dev.of_node; + struct axon_msic *msic; + unsigned int virq; + int dcr_base, dcr_len; + + pr_devel("axon_msi: setting up dn %pOF\n", dn); + + msic = kzalloc(sizeof(*msic), GFP_KERNEL); + if (!msic) { + printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n", + dn); + goto out; + } + + dcr_base = dcr_resource_start(dn, 0); + dcr_len = dcr_resource_len(dn, 0); + + if (dcr_base == 0 || dcr_len == 0) { + printk(KERN_ERR + "axon_msi: couldn't parse dcr properties on %pOF\n", + dn); + goto out_free_msic; + } + + msic->dcr_host = dcr_map(dn, dcr_base, dcr_len); + if (!DCR_MAP_OK(msic->dcr_host)) { + printk(KERN_ERR "axon_msi: dcr_map failed for %pOF\n", + dn); + goto out_free_msic; + } + + msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, + &msic->fifo_phys, GFP_KERNEL); + if (!msic->fifo_virt) { + printk(KERN_ERR "axon_msi: couldn't allocate fifo for %pOF\n", + dn); + goto out_free_msic; + } + + virq = irq_of_parse_and_map(dn, 0); + if (!virq) { + printk(KERN_ERR "axon_msi: irq parse and map failed for %pOF\n", + dn); + goto out_free_fifo; + } + memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES); + + /* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */ + msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic); + if (!msic->irq_domain) { + printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %pOF\n", + dn); + goto out_free_fifo; + } + + irq_set_handler_data(virq, msic); + irq_set_chained_handler(virq, axon_msi_cascade); + pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq); + + /* Enable the MSIC hardware */ + msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32); + msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG, + msic->fifo_phys & 0xFFFFFFFF); + msic_dcr_write(msic, MSIC_CTRL_REG, + MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE | + MSIC_CTRL_FIFO_SIZE); + + msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG) + & MSIC_FIFO_SIZE_MASK; + + dev_set_drvdata(&device->dev, msic); + + cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs; + cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs; + + axon_msi_debug_setup(dn, msic); + + printk(KERN_DEBUG "axon_msi: setup MSIC on %pOF\n", dn); + + return 0; + +out_free_fifo: + dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt, + msic->fifo_phys); +out_free_msic: + kfree(msic); +out: + + return -1; +} + +static const struct of_device_id axon_msi_device_id[] = { + { + .compatible = "ibm,axon-msic" + }, + {} +}; + +static struct platform_driver axon_msi_driver = { + .probe = axon_msi_probe, + .shutdown = axon_msi_shutdown, + .driver = { + .name = "axon-msi", + .of_match_table = axon_msi_device_id, + }, +}; + +static int __init axon_msi_init(void) +{ + return platform_driver_register(&axon_msi_driver); +} +subsys_initcall(axon_msi_init); + + +#ifdef DEBUG +static int msic_set(void *data, u64 val) +{ + struct axon_msic *msic = data; + out_le32(msic->trigger, val); + return 0; +} + +static int msic_get(void *data, u64 *val) +{ + *val = 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n"); + +void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic) +{ + char name[8]; + struct resource res; + + if (of_address_to_resource(dn, 0, &res)) { + pr_devel("axon_msi: couldn't get reg property\n"); + return; + } + + msic->trigger = ioremap(res.start, 0x4); + if (!msic->trigger) { + pr_devel("axon_msi: ioremap failed\n"); + return; + } + + snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn)); + + debugfs_create_file(name, 0600, arch_debugfs_dir, msic, &fops_msic); +} +#endif /* DEBUG */ diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c new file mode 100644 index 0000000000..a3ee397486 --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * driver for powerbutton on IBM cell blades + * + * (C) Copyright IBM Corp. 2005-2008 + * + * Author: Christian Krafft + */ + +#include +#include +#include +#include +#include + +static struct input_dev *button_dev; +static struct platform_device *button_pdev; + +static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg) +{ + BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON); + + input_report_key(button_dev, KEY_POWER, 1); + input_sync(button_dev); + input_report_key(button_dev, KEY_POWER, 0); + input_sync(button_dev); +} + +static struct pmi_handler cbe_pmi_handler = { + .type = PMI_TYPE_POWER_BUTTON, + .handle_pmi_message = cbe_powerbutton_handle_pmi, +}; + +static int __init cbe_powerbutton_init(void) +{ + int ret = 0; + struct input_dev *dev; + + if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) { + printk(KERN_ERR "%s: Not a cell blade.\n", __func__); + ret = -ENODEV; + goto out; + } + + dev = input_allocate_device(); + if (!dev) { + ret = -ENOMEM; + printk(KERN_ERR "%s: Not enough memory.\n", __func__); + goto out; + } + + set_bit(EV_KEY, dev->evbit); + set_bit(KEY_POWER, dev->keybit); + + dev->name = "Power Button"; + dev->id.bustype = BUS_HOST; + + /* this makes the button look like an acpi power button + * no clue whether anyone relies on that though */ + dev->id.product = 0x02; + dev->phys = "LNXPWRBN/button/input0"; + + button_pdev = platform_device_register_simple("power_button", 0, NULL, 0); + if (IS_ERR(button_pdev)) { + ret = PTR_ERR(button_pdev); + goto out_free_input; + } + + dev->dev.parent = &button_pdev->dev; + ret = input_register_device(dev); + if (ret) { + printk(KERN_ERR "%s: Failed to register device\n", __func__); + goto out_free_pdev; + } + + button_dev = dev; + + ret = pmi_register_handler(&cbe_pmi_handler); + if (ret) { + printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__); + goto out_free_pdev; + } + + goto out; + +out_free_pdev: + platform_device_unregister(button_pdev); +out_free_input: + input_free_device(dev); +out: + return ret; +} + +static void __exit cbe_powerbutton_exit(void) +{ + pmi_unregister_handler(&cbe_pmi_handler); + platform_device_unregister(button_pdev); + input_free_device(button_dev); +} + +module_init(cbe_powerbutton_init); +module_exit(cbe_powerbutton_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft "); diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c new file mode 100644 index 0000000000..99b3558753 --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * cbe_regs.c + * + * Accessor routines for the various MMIO register blocks of the CBE + * + * (c) 2006 Benjamin Herrenschmidt , IBM Corp. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Current implementation uses "cpu" nodes. We build our own mapping + * array of cpu numbers to cpu nodes locally for now to allow interrupt + * time code to have a fast path rather than call of_get_cpu_node(). If + * we implement cpu hotplug, we'll have to install an appropriate notifier + * in order to release references to the cpu going away + */ +static struct cbe_regs_map +{ + struct device_node *cpu_node; + struct device_node *be_node; + struct cbe_pmd_regs __iomem *pmd_regs; + struct cbe_iic_regs __iomem *iic_regs; + struct cbe_mic_tm_regs __iomem *mic_tm_regs; + struct cbe_pmd_shadow_regs pmd_shadow_regs; +} cbe_regs_maps[MAX_CBE]; +static int cbe_regs_map_count; + +static struct cbe_thread_map +{ + struct device_node *cpu_node; + struct device_node *be_node; + struct cbe_regs_map *regs; + unsigned int thread_id; + unsigned int cbe_id; +} cbe_thread_map[NR_CPUS]; + +static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} }; +static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE }; + +static struct cbe_regs_map *cbe_find_map(struct device_node *np) +{ + int i; + struct device_node *tmp_np; + + if (!of_node_is_type(np, "spe")) { + for (i = 0; i < cbe_regs_map_count; i++) + if (cbe_regs_maps[i].cpu_node == np || + cbe_regs_maps[i].be_node == np) + return &cbe_regs_maps[i]; + return NULL; + } + + if (np->data) + return np->data; + + /* walk up path until cpu or be node was found */ + tmp_np = np; + do { + tmp_np = tmp_np->parent; + /* on a correct devicetree we wont get up to root */ + BUG_ON(!tmp_np); + } while (!of_node_is_type(tmp_np, "cpu") || + !of_node_is_type(tmp_np, "be")); + + np->data = cbe_find_map(tmp_np); + + return np->data; +} + +struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return map->pmd_regs; +} +EXPORT_SYMBOL_GPL(cbe_get_pmd_regs); + +struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return map->pmd_regs; +} +EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs); + +struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return &map->pmd_shadow_regs; +} + +struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return &map->pmd_shadow_regs; +} + +struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return map->iic_regs; +} + +struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return map->iic_regs; +} + +struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return map->mic_tm_regs; +} + +struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return map->mic_tm_regs; +} +EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs); + +u32 cbe_get_hw_thread_id(int cpu) +{ + return cbe_thread_map[cpu].thread_id; +} +EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id); + +u32 cbe_cpu_to_node(int cpu) +{ + return cbe_thread_map[cpu].cbe_id; +} +EXPORT_SYMBOL_GPL(cbe_cpu_to_node); + +u32 cbe_node_to_cpu(int node) +{ + return cpumask_first(&cbe_local_mask[node]); + +} +EXPORT_SYMBOL_GPL(cbe_node_to_cpu); + +static struct device_node *__init cbe_get_be_node(int cpu_id) +{ + struct device_node *np; + + for_each_node_by_type (np, "be") { + int len,i; + const phandle *cpu_handle; + + cpu_handle = of_get_property(np, "cpus", &len); + + /* + * the CAB SLOF tree is non compliant, so we just assume + * there is only one node + */ + if (WARN_ON_ONCE(!cpu_handle)) + return np; + + for (i = 0; i < len; i++) { + struct device_node *ch_np = of_find_node_by_phandle(cpu_handle[i]); + struct device_node *ci_np = of_get_cpu_node(cpu_id, NULL); + + of_node_put(ch_np); + of_node_put(ci_np); + + if (ch_np == ci_np) + return np; + } + } + + return NULL; +} + +static void __init cbe_fill_regs_map(struct cbe_regs_map *map) +{ + if(map->be_node) { + struct device_node *be, *np, *parent_np; + + be = map->be_node; + + for_each_node_by_type(np, "pervasive") { + parent_np = of_get_parent(np); + if (parent_np == be) + map->pmd_regs = of_iomap(np, 0); + of_node_put(parent_np); + } + + for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller") { + parent_np = of_get_parent(np); + if (parent_np == be) + map->iic_regs = of_iomap(np, 2); + of_node_put(parent_np); + } + + for_each_node_by_type(np, "mic-tm") { + parent_np = of_get_parent(np); + if (parent_np == be) + map->mic_tm_regs = of_iomap(np, 0); + of_node_put(parent_np); + } + } else { + struct device_node *cpu; + /* That hack must die die die ! */ + const struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + + cpu = map->cpu_node; + + prop = of_get_property(cpu, "pervasive", NULL); + if (prop != NULL) + map->pmd_regs = ioremap(prop->address, prop->len); + + prop = of_get_property(cpu, "iic", NULL); + if (prop != NULL) + map->iic_regs = ioremap(prop->address, prop->len); + + prop = of_get_property(cpu, "mic-tm", NULL); + if (prop != NULL) + map->mic_tm_regs = ioremap(prop->address, prop->len); + } +} + + +void __init cbe_regs_init(void) +{ + int i; + unsigned int thread_id; + struct device_node *cpu; + + /* Build local fast map of CPUs */ + for_each_possible_cpu(i) { + cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id); + cbe_thread_map[i].be_node = cbe_get_be_node(i); + cbe_thread_map[i].thread_id = thread_id; + } + + /* Find maps for each device tree CPU */ + for_each_node_by_type(cpu, "cpu") { + struct cbe_regs_map *map; + unsigned int cbe_id; + + cbe_id = cbe_regs_map_count++; + map = &cbe_regs_maps[cbe_id]; + + if (cbe_regs_map_count > MAX_CBE) { + printk(KERN_ERR "cbe_regs: More BE chips than supported" + "!\n"); + cbe_regs_map_count--; + of_node_put(cpu); + return; + } + of_node_put(map->cpu_node); + map->cpu_node = of_node_get(cpu); + + for_each_possible_cpu(i) { + struct cbe_thread_map *thread = &cbe_thread_map[i]; + + if (thread->cpu_node == cpu) { + thread->regs = map; + thread->cbe_id = cbe_id; + map->be_node = thread->be_node; + cpumask_set_cpu(i, &cbe_local_mask[cbe_id]); + if(thread->thread_id == 0) + cpumask_set_cpu(i, &cbe_first_online_cpu); + } + } + + cbe_fill_regs_map(map); + } +} + diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c new file mode 100644 index 0000000000..2f45428e32 --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -0,0 +1,386 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * thermal support for the cell processor + * + * This module adds some sysfs attributes to cpu and spu nodes. + * Base for measurements are the digital thermal sensors (DTS) + * located on the chip. + * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius + * The attributes can be found under + * /sys/devices/system/cpu/cpuX/thermal + * /sys/devices/system/spu/spuX/thermal + * + * The following attributes are added for each node: + * temperature: + * contains the current temperature measured by the DTS + * throttle_begin: + * throttling begins when temperature is greater or equal to + * throttle_begin. Setting this value to 125 prevents throttling. + * throttle_end: + * throttling is being ceased, if the temperature is lower than + * throttle_end. Due to a delay between applying throttling and + * a reduced temperature this value should be less than throttle_begin. + * A value equal to throttle_begin provides only a very little hysteresis. + * throttle_full_stop: + * If the temperatrue is greater or equal to throttle_full_stop, + * full throttling is applied to the cpu or spu. This value should be + * greater than throttle_begin and throttle_end. Setting this value to + * 65 prevents the unit from running code at all. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Christian Krafft + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spu_priv1_mmio.h" + +#define TEMP_MIN 65 +#define TEMP_MAX 125 + +#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode) \ +struct device_attribute attr_ ## _prefix ## _ ## _name = { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _prefix ## _show_ ## _name, \ + .store = _prefix ## _store_ ## _name, \ +}; + +static inline u8 reg_to_temp(u8 reg_value) +{ + return ((reg_value & 0x3f) << 1) + TEMP_MIN; +} + +static inline u8 temp_to_reg(u8 temp) +{ + return ((temp - TEMP_MIN) >> 1) & 0x3f; +} + +static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev) +{ + struct spu *spu; + + spu = container_of(dev, struct spu, dev); + + return cbe_get_pmd_regs(spu_devnode(spu)); +} + +/* returns the value for a given spu in a given register */ +static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg) +{ + union spe_reg value; + struct spu *spu; + + spu = container_of(dev, struct spu, dev); + value.val = in_be64(®->val); + + return value.spe[spu->spe_id]; +} + +static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr, + char *buf) +{ + u8 value; + struct cbe_pmd_regs __iomem *pmd_regs; + + pmd_regs = get_pmd_regs(dev); + + value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1); + + return sprintf(buf, "%d\n", reg_to_temp(value)); +} + +static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos) +{ + u64 value; + + value = in_be64(&pmd_regs->tm_tpr.val); + /* access the corresponding byte */ + value >>= pos; + value &= 0x3F; + + return sprintf(buf, "%d\n", reg_to_temp(value)); +} + +static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos) +{ + u64 reg_value; + unsigned int temp; + u64 new_value; + int ret; + + ret = sscanf(buf, "%u", &temp); + + if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX) + return -EINVAL; + + new_value = temp_to_reg(temp); + + reg_value = in_be64(&pmd_regs->tm_tpr.val); + + /* zero out bits for new value */ + reg_value &= ~(0xffull << pos); + /* set bits to new value */ + reg_value |= new_value << pos; + + out_be64(&pmd_regs->tm_tpr.val, reg_value); + return size; +} + +static ssize_t spu_show_throttle_end(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(get_pmd_regs(dev), buf, 0); +} + +static ssize_t spu_show_throttle_begin(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(get_pmd_regs(dev), buf, 8); +} + +static ssize_t spu_show_throttle_full_stop(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(get_pmd_regs(dev), buf, 16); +} + +static ssize_t spu_store_throttle_end(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(get_pmd_regs(dev), buf, size, 0); +} + +static ssize_t spu_store_throttle_begin(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(get_pmd_regs(dev), buf, size, 8); +} + +static ssize_t spu_store_throttle_full_stop(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(get_pmd_regs(dev), buf, size, 16); +} + +static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos) +{ + struct cbe_pmd_regs __iomem *pmd_regs; + u64 value; + + pmd_regs = cbe_get_cpu_pmd_regs(dev->id); + value = in_be64(&pmd_regs->ts_ctsr2); + + value = (value >> pos) & 0x3f; + + return sprintf(buf, "%d\n", reg_to_temp(value)); +} + + +/* shows the temperature of the DTS on the PPE, + * located near the linear thermal sensor */ +static ssize_t ppe_show_temp0(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return ppe_show_temp(dev, buf, 32); +} + +/* shows the temperature of the second DTS on the PPE */ +static ssize_t ppe_show_temp1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return ppe_show_temp(dev, buf, 0); +} + +static ssize_t ppe_show_throttle_end(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32); +} + +static ssize_t ppe_show_throttle_begin(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40); +} + +static ssize_t ppe_show_throttle_full_stop(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48); +} + +static ssize_t ppe_store_throttle_end(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32); +} + +static ssize_t ppe_store_throttle_begin(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40); +} + +static ssize_t ppe_store_throttle_full_stop(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48); +} + + +static struct device_attribute attr_spu_temperature = { + .attr = {.name = "temperature", .mode = 0400 }, + .show = spu_show_temp, +}; + +static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600); +static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600); +static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600); + + +static struct attribute *spu_attributes[] = { + &attr_spu_temperature.attr, + &attr_spu_throttle_end.attr, + &attr_spu_throttle_begin.attr, + &attr_spu_throttle_full_stop.attr, + NULL, +}; + +static const struct attribute_group spu_attribute_group = { + .name = "thermal", + .attrs = spu_attributes, +}; + +static struct device_attribute attr_ppe_temperature0 = { + .attr = {.name = "temperature0", .mode = 0400 }, + .show = ppe_show_temp0, +}; + +static struct device_attribute attr_ppe_temperature1 = { + .attr = {.name = "temperature1", .mode = 0400 }, + .show = ppe_show_temp1, +}; + +static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600); +static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600); +static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600); + +static struct attribute *ppe_attributes[] = { + &attr_ppe_temperature0.attr, + &attr_ppe_temperature1.attr, + &attr_ppe_throttle_end.attr, + &attr_ppe_throttle_begin.attr, + &attr_ppe_throttle_full_stop.attr, + NULL, +}; + +static struct attribute_group ppe_attribute_group = { + .name = "thermal", + .attrs = ppe_attributes, +}; + +/* + * initialize throttling with default values + */ +static int __init init_default_values(void) +{ + int cpu; + struct cbe_pmd_regs __iomem *pmd_regs; + struct device *dev; + union ppe_spe_reg tpr; + union spe_reg str1; + u64 str2; + union spe_reg cr1; + u64 cr2; + + /* TPR defaults */ + /* ppe + * 1F - no full stop + * 08 - dynamic throttling starts if over 80 degrees + * 03 - dynamic throttling ceases if below 70 degrees */ + tpr.ppe = 0x1F0803; + /* spe + * 10 - full stopped when over 96 degrees + * 08 - dynamic throttling starts if over 80 degrees + * 03 - dynamic throttling ceases if below 70 degrees + */ + tpr.spe = 0x100803; + + /* STR defaults */ + /* str1 + * 10 - stop 16 of 32 cycles + */ + str1.val = 0x1010101010101010ull; + /* str2 + * 10 - stop 16 of 32 cycles + */ + str2 = 0x10; + + /* CR defaults */ + /* cr1 + * 4 - normal operation + */ + cr1.val = 0x0404040404040404ull; + /* cr2 + * 4 - normal operation + */ + cr2 = 0x04; + + for_each_possible_cpu (cpu) { + pr_debug("processing cpu %d\n", cpu); + dev = get_cpu_device(cpu); + + if (!dev) { + pr_info("invalid dev pointer for cbe_thermal\n"); + return -EINVAL; + } + + pmd_regs = cbe_get_cpu_pmd_regs(dev->id); + + if (!pmd_regs) { + pr_info("invalid CBE regs pointer for cbe_thermal\n"); + return -EINVAL; + } + + out_be64(&pmd_regs->tm_str2, str2); + out_be64(&pmd_regs->tm_str1.val, str1.val); + out_be64(&pmd_regs->tm_tpr.val, tpr.val); + out_be64(&pmd_regs->tm_cr1.val, cr1.val); + out_be64(&pmd_regs->tm_cr2, cr2); + } + + return 0; +} + + +static int __init thermal_init(void) +{ + int rc = init_default_values(); + + if (rc == 0) { + spu_add_dev_attr_group(&spu_attribute_group); + cpu_add_dev_attr_group(&ppe_attribute_group); + } + + return rc; +} +module_init(thermal_init); + +static void __exit thermal_exit(void) +{ + spu_remove_dev_attr_group(&spu_attribute_group); + cpu_remove_dev_attr_group(&ppe_attribute_group); +} +module_exit(thermal_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft "); + diff --git a/arch/powerpc/platforms/cell/cell.h b/arch/powerpc/platforms/cell/cell.h new file mode 100644 index 0000000000..d5142e905a --- /dev/null +++ b/arch/powerpc/platforms/cell/cell.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Cell Platform common data structures + * + * Copyright 2015, Daniel Axtens, IBM Corporation + */ + +#ifndef CELL_H +#define CELL_H + +#include + +extern struct pci_controller_ops cell_pci_controller_ops; + +#endif diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c new file mode 100644 index 0000000000..ca7849e113 --- /dev/null +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * spu aware cpufreq governor for the cell processor + * + * © Copyright IBM Corporation 2006-2008 + * + * Author: Christian Krafft + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define POLL_TIME 100000 /* in µs */ +#define EXP 753 /* exp(-1) in fixed-point */ + +struct spu_gov_info_struct { + unsigned long busy_spus; /* fixed-point */ + struct cpufreq_policy *policy; + struct delayed_work work; + unsigned int poll_int; /* µs */ +}; +static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info); + +static int calc_freq(struct spu_gov_info_struct *info) +{ + int cpu; + int busy_spus; + + cpu = info->policy->cpu; + busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus); + + info->busy_spus = calc_load(info->busy_spus, EXP, busy_spus * FIXED_1); + pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n", + cpu, busy_spus, info->busy_spus); + + return info->policy->max * info->busy_spus / FIXED_1; +} + +static void spu_gov_work(struct work_struct *work) +{ + struct spu_gov_info_struct *info; + int delay; + unsigned long target_freq; + + info = container_of(work, struct spu_gov_info_struct, work.work); + + /* after cancel_delayed_work_sync we unset info->policy */ + BUG_ON(info->policy == NULL); + + target_freq = calc_freq(info); + __cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H); + + delay = usecs_to_jiffies(info->poll_int); + schedule_delayed_work_on(info->policy->cpu, &info->work, delay); +} + +static void spu_gov_init_work(struct spu_gov_info_struct *info) +{ + int delay = usecs_to_jiffies(info->poll_int); + INIT_DEFERRABLE_WORK(&info->work, spu_gov_work); + schedule_delayed_work_on(info->policy->cpu, &info->work, delay); +} + +static void spu_gov_cancel_work(struct spu_gov_info_struct *info) +{ + cancel_delayed_work_sync(&info->work); +} + +static int spu_gov_start(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + struct spu_gov_info_struct *affected_info; + int i; + + if (!cpu_online(cpu)) { + printk(KERN_ERR "cpu %d is not online\n", cpu); + return -EINVAL; + } + + if (!policy->cur) { + printk(KERN_ERR "no cpu specified in policy\n"); + return -EINVAL; + } + + /* initialize spu_gov_info for all affected cpus */ + for_each_cpu(i, policy->cpus) { + affected_info = &per_cpu(spu_gov_info, i); + affected_info->policy = policy; + } + + info->poll_int = POLL_TIME; + + /* setup timer */ + spu_gov_init_work(info); + + return 0; +} + +static void spu_gov_stop(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + int i; + + /* cancel timer */ + spu_gov_cancel_work(info); + + /* clean spu_gov_info for all affected cpus */ + for_each_cpu (i, policy->cpus) { + info = &per_cpu(spu_gov_info, i); + info->policy = NULL; + } +} + +static struct cpufreq_governor spu_governor = { + .name = "spudemand", + .start = spu_gov_start, + .stop = spu_gov_stop, + .owner = THIS_MODULE, +}; +cpufreq_governor_init(spu_governor); +cpufreq_governor_exit(spu_governor); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft "); diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c new file mode 100644 index 0000000000..03ee8152ee --- /dev/null +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cell Internal Interrupt Controller + * + * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * IBM, Corp. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + * + * TODO: + * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers + * vs node numbers in the setup code + * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from + * a non-active node to the active node) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "interrupt.h" + +struct iic { + struct cbe_iic_thread_regs __iomem *regs; + u8 target_id; + u8 eoi_stack[16]; + int eoi_ptr; + struct device_node *node; +}; + +static DEFINE_PER_CPU(struct iic, cpu_iic); +#define IIC_NODE_COUNT 2 +static struct irq_domain *iic_host; + +/* Convert between "pending" bits and hw irq number */ +static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits) +{ + unsigned char unit = bits.source & 0xf; + unsigned char node = bits.source >> 4; + unsigned char class = bits.class & 3; + + /* Decode IPIs */ + if (bits.flags & CBE_IIC_IRQ_IPI) + return IIC_IRQ_TYPE_IPI | (bits.prio >> 4); + else + return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit; +} + +static void iic_mask(struct irq_data *d) +{ +} + +static void iic_unmask(struct irq_data *d) +{ +} + +static void iic_eoi(struct irq_data *d) +{ + struct iic *iic = this_cpu_ptr(&cpu_iic); + out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); + BUG_ON(iic->eoi_ptr < 0); +} + +static struct irq_chip iic_chip = { + .name = "CELL-IIC", + .irq_mask = iic_mask, + .irq_unmask = iic_unmask, + .irq_eoi = iic_eoi, +}; + + +static void iic_ioexc_eoi(struct irq_data *d) +{ +} + +static void iic_ioexc_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct cbe_iic_regs __iomem *node_iic = + (void __iomem *)irq_desc_get_handler_data(desc); + unsigned int irq = irq_desc_get_irq(desc); + unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC; + unsigned long bits, ack; + int cascade; + + for (;;) { + bits = in_be64(&node_iic->iic_is); + if (bits == 0) + break; + /* pre-ack edge interrupts */ + ack = bits & IIC_ISR_EDGE_MASK; + if (ack) + out_be64(&node_iic->iic_is, ack); + /* handle them */ + for (cascade = 63; cascade >= 0; cascade--) + if (bits & (0x8000000000000000UL >> cascade)) + generic_handle_domain_irq(iic_host, + base | cascade); + /* post-ack level interrupts */ + ack = bits & ~IIC_ISR_EDGE_MASK; + if (ack) + out_be64(&node_iic->iic_is, ack); + } + chip->irq_eoi(&desc->irq_data); +} + + +static struct irq_chip iic_ioexc_chip = { + .name = "CELL-IOEX", + .irq_mask = iic_mask, + .irq_unmask = iic_unmask, + .irq_eoi = iic_ioexc_eoi, +}; + +/* Get an IRQ number from the pending state register of the IIC */ +static unsigned int iic_get_irq(void) +{ + struct cbe_iic_pending_bits pending; + struct iic *iic; + unsigned int virq; + + iic = this_cpu_ptr(&cpu_iic); + *(unsigned long *) &pending = + in_be64((u64 __iomem *) &iic->regs->pending_destr); + if (!(pending.flags & CBE_IIC_IRQ_VALID)) + return 0; + virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending)); + if (!virq) + return 0; + iic->eoi_stack[++iic->eoi_ptr] = pending.prio; + BUG_ON(iic->eoi_ptr > 15); + return virq; +} + +void iic_setup_cpu(void) +{ + out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff); +} + +u8 iic_get_target_id(int cpu) +{ + return per_cpu(cpu_iic, cpu).target_id; +} + +EXPORT_SYMBOL_GPL(iic_get_target_id); + +#ifdef CONFIG_SMP + +/* Use the highest interrupt priorities for IPI */ +static inline int iic_msg_to_irq(int msg) +{ + return IIC_IRQ_TYPE_IPI + 0xf - msg; +} + +void iic_message_pass(int cpu, int msg) +{ + out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4); +} + +static void iic_request_ipi(int msg) +{ + int virq; + + virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg)); + if (!virq) { + printk(KERN_ERR + "iic: failed to map IPI %s\n", smp_ipi_name[msg]); + return; + } + + /* + * If smp_request_message_ipi encounters an error it will notify + * the error. If a message is not needed it will return non-zero. + */ + if (smp_request_message_ipi(virq, msg)) + irq_dispose_mapping(virq); +} + +void iic_request_IPIs(void) +{ + iic_request_ipi(PPC_MSG_CALL_FUNCTION); + iic_request_ipi(PPC_MSG_RESCHEDULE); + iic_request_ipi(PPC_MSG_TICK_BROADCAST); + iic_request_ipi(PPC_MSG_NMI_IPI); +} + +#endif /* CONFIG_SMP */ + + +static int iic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) +{ + return of_device_is_compatible(node, + "IBM,CBEA-Internal-Interrupt-Controller"); +} + +static int iic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + switch (hw & IIC_IRQ_TYPE_MASK) { + case IIC_IRQ_TYPE_IPI: + irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq); + break; + case IIC_IRQ_TYPE_IOEXC: + irq_set_chip_and_handler(virq, &iic_ioexc_chip, + handle_edge_eoi_irq); + break; + default: + irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq); + } + return 0; +} + +static int iic_host_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + unsigned int node, ext, unit, class; + const u32 *val; + + if (!of_device_is_compatible(ct, + "IBM,CBEA-Internal-Interrupt-Controller")) + return -ENODEV; + if (intsize != 1) + return -ENODEV; + val = of_get_property(ct, "#interrupt-cells", NULL); + if (val == NULL || *val != 1) + return -ENODEV; + + node = intspec[0] >> 24; + ext = (intspec[0] >> 16) & 0xff; + class = (intspec[0] >> 8) & 0xff; + unit = intspec[0] & 0xff; + + /* Check if node is in supported range */ + if (node > 1) + return -EINVAL; + + /* Build up interrupt number, special case for IO exceptions */ + *out_hwirq = (node << IIC_IRQ_NODE_SHIFT); + if (unit == IIC_UNIT_IIC && class == 1) + *out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext; + else + *out_hwirq |= IIC_IRQ_TYPE_NORMAL | + (class << IIC_IRQ_CLASS_SHIFT) | unit; + + /* Dummy flags, ignored by iic code */ + *out_flags = IRQ_TYPE_EDGE_RISING; + + return 0; +} + +static const struct irq_domain_ops iic_host_ops = { + .match = iic_host_match, + .map = iic_host_map, + .xlate = iic_host_xlate, +}; + +static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr, + struct device_node *node) +{ + /* XXX FIXME: should locate the linux CPU number from the HW cpu + * number properly. We are lucky for now + */ + struct iic *iic = &per_cpu(cpu_iic, hw_cpu); + + iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs)); + BUG_ON(iic->regs == NULL); + + iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe); + iic->eoi_stack[0] = 0xff; + iic->node = of_node_get(node); + out_be64(&iic->regs->prio, 0); + + printk(KERN_INFO "IIC for CPU %d target id 0x%x : %pOF\n", + hw_cpu, iic->target_id, node); +} + +static int __init setup_iic(void) +{ + struct device_node *dn; + struct resource r0, r1; + unsigned int node, cascade, found = 0; + struct cbe_iic_regs __iomem *node_iic; + const u32 *np; + + for_each_node_by_name(dn, "interrupt-controller") { + if (!of_device_is_compatible(dn, + "IBM,CBEA-Internal-Interrupt-Controller")) + continue; + np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL); + if (np == NULL) { + printk(KERN_WARNING "IIC: CPU association not found\n"); + of_node_put(dn); + return -ENODEV; + } + if (of_address_to_resource(dn, 0, &r0) || + of_address_to_resource(dn, 1, &r1)) { + printk(KERN_WARNING "IIC: Can't resolve addresses\n"); + of_node_put(dn); + return -ENODEV; + } + found++; + init_one_iic(np[0], r0.start, dn); + init_one_iic(np[1], r1.start, dn); + + /* Setup cascade for IO exceptions. XXX cleanup tricks to get + * node vs CPU etc... + * Note that we configure the IIC_IRR here with a hard coded + * priority of 1. We might want to improve that later. + */ + node = np[0] >> 1; + node_iic = cbe_get_cpu_iic_regs(np[0]); + cascade = node << IIC_IRQ_NODE_SHIFT; + cascade |= 1 << IIC_IRQ_CLASS_SHIFT; + cascade |= IIC_UNIT_IIC; + cascade = irq_create_mapping(iic_host, cascade); + if (!cascade) + continue; + /* + * irq_data is a generic pointer that gets passed back + * to us later, so the forced cast is fine. + */ + irq_set_handler_data(cascade, (void __force *)node_iic); + irq_set_chained_handler(cascade, iic_ioexc_cascade); + out_be64(&node_iic->iic_ir, + (1 << 12) /* priority */ | + (node << 4) /* dest node */ | + IIC_UNIT_THREAD_0 /* route them to thread 0 */); + /* Flush pending (make sure it triggers if there is + * anything pending + */ + out_be64(&node_iic->iic_is, 0xfffffffffffffffful); + } + + if (found) + return 0; + else + return -ENODEV; +} + +void __init iic_init_IRQ(void) +{ + /* Setup an irq host data structure */ + iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops, + NULL); + BUG_ON(iic_host == NULL); + irq_set_default_host(iic_host); + + /* Discover and initialize iics */ + if (setup_iic() < 0) + panic("IIC: Failed to initialize !\n"); + + /* Set master interrupt handling function */ + ppc_md.get_irq = iic_get_irq; + + /* Enable on current CPU */ + iic_setup_cpu(); +} + +void iic_set_interrupt_routing(int cpu, int thread, int priority) +{ + struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu); + u64 iic_ir = 0; + int node = cpu >> 1; + + /* Set which node and thread will handle the next interrupt */ + iic_ir |= CBE_IIC_IR_PRIO(priority) | + CBE_IIC_IR_DEST_NODE(node); + if (thread == 0) + iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0); + else + iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1); + out_be64(&iic_regs->iic_ir, iic_ir); +} diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h new file mode 100644 index 0000000000..a479022485 --- /dev/null +++ b/arch/powerpc/platforms/cell/interrupt.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_CELL_PIC_H +#define ASM_CELL_PIC_H +#ifdef __KERNEL__ +/* + * Mapping of IIC pending bits into per-node interrupt numbers. + * + * Interrupt numbers are in the range 0...0x1ff where the top bit + * (0x100) represent the source node. Only 2 nodes are supported with + * the current code though it's trivial to extend that if necessary using + * higher level bits + * + * The bottom 8 bits are split into 2 type bits and 6 data bits that + * depend on the type: + * + * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source + * 01 (0x40 | data) : IO exception. data is the exception number as + * defined by bit numbers in IIC_SR + * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority) + * and node is always 0 (IPIs are per-cpu, their source is + * not relevant) + * 11 (0xc0 | data) : reserved + * + * In addition, interrupt number 0x80000000 is defined as always invalid + * (that is the node field is expected to never extend to move than 23 bits) + * + */ + +enum { + IIC_IRQ_INVALID = 0x80000000u, + IIC_IRQ_NODE_MASK = 0x100, + IIC_IRQ_NODE_SHIFT = 8, + IIC_IRQ_MAX = 0x1ff, + IIC_IRQ_TYPE_MASK = 0xc0, + IIC_IRQ_TYPE_NORMAL = 0x00, + IIC_IRQ_TYPE_IOEXC = 0x40, + IIC_IRQ_TYPE_IPI = 0x80, + IIC_IRQ_CLASS_SHIFT = 4, + IIC_IRQ_CLASS_0 = 0x00, + IIC_IRQ_CLASS_1 = 0x10, + IIC_IRQ_CLASS_2 = 0x20, + IIC_SOURCE_COUNT = 0x200, + + /* Here are defined the various source/dest units. Avoid using those + * definitions if you can, they are mostly here for reference + */ + IIC_UNIT_SPU_0 = 0x4, + IIC_UNIT_SPU_1 = 0x7, + IIC_UNIT_SPU_2 = 0x3, + IIC_UNIT_SPU_3 = 0x8, + IIC_UNIT_SPU_4 = 0x2, + IIC_UNIT_SPU_5 = 0x9, + IIC_UNIT_SPU_6 = 0x1, + IIC_UNIT_SPU_7 = 0xa, + IIC_UNIT_IOC_0 = 0x0, + IIC_UNIT_IOC_1 = 0xb, + IIC_UNIT_THREAD_0 = 0xe, /* target only */ + IIC_UNIT_THREAD_1 = 0xf, /* target only */ + IIC_UNIT_IIC = 0xe, /* source only (IO exceptions) */ + + /* Base numbers for the external interrupts */ + IIC_IRQ_EXT_IOIF0 = + IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0, + IIC_IRQ_EXT_IOIF1 = + IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1, + + /* Base numbers for the IIC_ISR interrupts */ + IIC_IRQ_IOEX_TMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63, + IIC_IRQ_IOEX_PMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62, + IIC_IRQ_IOEX_ATI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61, + IIC_IRQ_IOEX_MATBFI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60, + IIC_IRQ_IOEX_ELDI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59, + + /* Which bits in IIC_ISR are edge sensitive */ + IIC_ISR_EDGE_MASK = 0x4ul, +}; + +extern void iic_init_IRQ(void); +extern void iic_message_pass(int cpu, int msg); +extern void iic_request_IPIs(void); +extern void iic_setup_cpu(void); + +extern u8 iic_get_target_id(int cpu); + +extern void spider_init_IRQ(void); + +extern void iic_set_interrupt_routing(int cpu, int thread, int priority); + +#endif +#endif /* ASM_CELL_PIC_H */ diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c new file mode 100644 index 0000000000..1202a69b0a --- /dev/null +++ b/arch/powerpc/platforms/cell/iommu.c @@ -0,0 +1,1094 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * IOMMU implementation for Cell Broadband Processor Architecture + * + * (C) Copyright IBM Corporation 2006-2008 + * + * Author: Jeremy Kerr + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "cell.h" +#include "interrupt.h" + +/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages + * instead of leaving them mapped to some dummy page. This can be + * enabled once the appropriate workarounds for spider bugs have + * been enabled + */ +#define CELL_IOMMU_REAL_UNMAP + +/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of + * IO PTEs based on the transfer direction. That can be enabled + * once spider-net has been fixed to pass the correct direction + * to the DMA mapping functions + */ +#define CELL_IOMMU_STRICT_PROTECTION + + +#define NR_IOMMUS 2 + +/* IOC mmap registers */ +#define IOC_Reg_Size 0x2000 + +#define IOC_IOPT_CacheInvd 0x908 +#define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul +#define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul +#define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul + +#define IOC_IOST_Origin 0x918 +#define IOC_IOST_Origin_E 0x8000000000000000ul +#define IOC_IOST_Origin_HW 0x0000000000000800ul +#define IOC_IOST_Origin_HL 0x0000000000000400ul + +#define IOC_IO_ExcpStat 0x920 +#define IOC_IO_ExcpStat_V 0x8000000000000000ul +#define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul +#define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul +#define IOC_IO_ExcpStat_SPF_P 0x2000000000000000ul +#define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul +#define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul +#define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful + +#define IOC_IO_ExcpMask 0x928 +#define IOC_IO_ExcpMask_SFE 0x4000000000000000ul +#define IOC_IO_ExcpMask_PFE 0x2000000000000000ul + +#define IOC_IOCmd_Offset 0x1000 + +#define IOC_IOCmd_Cfg 0xc00 +#define IOC_IOCmd_Cfg_TE 0x0000800000000000ul + + +/* Segment table entries */ +#define IOSTE_V 0x8000000000000000ul /* valid */ +#define IOSTE_H 0x4000000000000000ul /* cache hint */ +#define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */ +#define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */ +#define IOSTE_PS_Mask 0x0000000000000007ul /* page size */ +#define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */ +#define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */ +#define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */ +#define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */ + + +/* IOMMU sizing */ +#define IO_SEGMENT_SHIFT 28 +#define IO_PAGENO_BITS(shift) (IO_SEGMENT_SHIFT - (shift)) + +/* The high bit needs to be set on every DMA address */ +#define SPIDER_DMA_OFFSET 0x80000000ul + +struct iommu_window { + struct list_head list; + struct cbe_iommu *iommu; + unsigned long offset; + unsigned long size; + unsigned int ioid; + struct iommu_table table; +}; + +#define NAMESIZE 8 +struct cbe_iommu { + int nid; + char name[NAMESIZE]; + void __iomem *xlate_regs; + void __iomem *cmd_regs; + unsigned long *stab; + unsigned long *ptab; + void *pad_page; + struct list_head windows; +}; + +/* Static array of iommus, one per node + * each contains a list of windows, keyed from dma_window property + * - on bus setup, look for a matching window, or create one + * - on dev setup, assign iommu_table ptr + */ +static struct cbe_iommu iommus[NR_IOMMUS]; +static int cbe_nr_iommus; + +static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, + long n_ptes) +{ + u64 __iomem *reg; + u64 val; + long n; + + reg = iommu->xlate_regs + IOC_IOPT_CacheInvd; + + while (n_ptes > 0) { + /* we can invalidate up to 1 << 11 PTEs at once */ + n = min(n_ptes, 1l << 11); + val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask) + | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask) + | IOC_IOPT_CacheInvd_Busy; + + out_be64(reg, val); + while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy) + ; + + n_ptes -= n; + pte += n; + } +} + +static int tce_build_cell(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + unsigned long attrs) +{ + int i; + unsigned long *io_pte, base_pte; + struct iommu_window *window = + container_of(tbl, struct iommu_window, table); + + /* implementing proper protection causes problems with the spidernet + * driver - check mapping directions later, but allow read & write by + * default for now.*/ +#ifdef CELL_IOMMU_STRICT_PROTECTION + /* to avoid referencing a global, we use a trick here to setup the + * protection bit. "prot" is setup to be 3 fields of 4 bits appended + * together for each of the 3 supported direction values. It is then + * shifted left so that the fields matching the desired direction + * lands on the appropriate bits, and other bits are masked out. + */ + const unsigned long prot = 0xc48; + base_pte = + ((prot << (52 + 4 * direction)) & + (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) | + CBE_IOPTE_M | CBE_IOPTE_SO_RW | + (window->ioid & CBE_IOPTE_IOID_Mask); +#else + base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | + CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask); +#endif + if (unlikely(attrs & DMA_ATTR_WEAK_ORDERING)) + base_pte &= ~CBE_IOPTE_SO_RW; + + io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); + + for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift)) + io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); + + mb(); + + invalidate_tce_cache(window->iommu, io_pte, npages); + + pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", + index, npages, direction, base_pte); + return 0; +} + +static void tce_free_cell(struct iommu_table *tbl, long index, long npages) +{ + + int i; + unsigned long *io_pte, pte; + struct iommu_window *window = + container_of(tbl, struct iommu_window, table); + + pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages); + +#ifdef CELL_IOMMU_REAL_UNMAP + pte = 0; +#else + /* spider bridge does PCI reads after freeing - insert a mapping + * to a scratch page instead of an invalid entry */ + pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW | + __pa(window->iommu->pad_page) | + (window->ioid & CBE_IOPTE_IOID_Mask); +#endif + + io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); + + for (i = 0; i < npages; i++) + io_pte[i] = pte; + + mb(); + + invalidate_tce_cache(window->iommu, io_pte, npages); +} + +static irqreturn_t ioc_interrupt(int irq, void *data) +{ + unsigned long stat, spf; + struct cbe_iommu *iommu = data; + + stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); + spf = stat & IOC_IO_ExcpStat_SPF_Mask; + + /* Might want to rate limit it */ + printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat); + printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n", + !!(stat & IOC_IO_ExcpStat_V), + (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ', + (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ', + (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write", + (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask)); + printk(KERN_ERR " page=0x%016lx\n", + stat & IOC_IO_ExcpStat_ADDR_Mask); + + /* clear interrupt */ + stat &= ~IOC_IO_ExcpStat_V; + out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat); + + return IRQ_HANDLED; +} + +static int __init cell_iommu_find_ioc(int nid, unsigned long *base) +{ + struct device_node *np; + struct resource r; + + *base = 0; + + /* First look for new style /be nodes */ + for_each_node_by_name(np, "ioc") { + if (of_node_to_nid(np) != nid) + continue; + if (of_address_to_resource(np, 0, &r)) { + printk(KERN_ERR "iommu: can't get address for %pOF\n", + np); + continue; + } + *base = r.start; + of_node_put(np); + return 0; + } + + /* Ok, let's try the old way */ + for_each_node_by_type(np, "cpu") { + const unsigned int *nidp; + const unsigned long *tmp; + + nidp = of_get_property(np, "node-id", NULL); + if (nidp && *nidp == nid) { + tmp = of_get_property(np, "ioc-translation", NULL); + if (tmp) { + *base = *tmp; + of_node_put(np); + return 0; + } + } + } + + return -ENODEV; +} + +static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu, + unsigned long dbase, unsigned long dsize, + unsigned long fbase, unsigned long fsize) +{ + struct page *page; + unsigned long segments, stab_size; + + segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT; + + pr_debug("%s: iommu[%d]: segments: %lu\n", + __func__, iommu->nid, segments); + + /* set up the segment table */ + stab_size = segments * sizeof(unsigned long); + page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size)); + BUG_ON(!page); + iommu->stab = page_address(page); + memset(iommu->stab, 0, stab_size); +} + +static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu, + unsigned long base, unsigned long size, unsigned long gap_base, + unsigned long gap_size, unsigned long page_shift) +{ + struct page *page; + int i; + unsigned long reg, segments, pages_per_segment, ptab_size, + n_pte_pages, start_seg, *ptab; + + start_seg = base >> IO_SEGMENT_SHIFT; + segments = size >> IO_SEGMENT_SHIFT; + pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift); + /* PTEs for each segment must start on a 4K boundary */ + pages_per_segment = max(pages_per_segment, + (1 << 12) / sizeof(unsigned long)); + + ptab_size = segments * pages_per_segment * sizeof(unsigned long); + pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__, + iommu->nid, ptab_size, get_order(ptab_size)); + page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size)); + BUG_ON(!page); + + ptab = page_address(page); + memset(ptab, 0, ptab_size); + + /* number of 4K pages needed for a page table */ + n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12; + + pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n", + __func__, iommu->nid, iommu->stab, ptab, + n_pte_pages); + + /* initialise the STEs */ + reg = IOSTE_V | ((n_pte_pages - 1) << 5); + + switch (page_shift) { + case 12: reg |= IOSTE_PS_4K; break; + case 16: reg |= IOSTE_PS_64K; break; + case 20: reg |= IOSTE_PS_1M; break; + case 24: reg |= IOSTE_PS_16M; break; + default: BUG(); + } + + gap_base = gap_base >> IO_SEGMENT_SHIFT; + gap_size = gap_size >> IO_SEGMENT_SHIFT; + + pr_debug("Setting up IOMMU stab:\n"); + for (i = start_seg; i < (start_seg + segments); i++) { + if (i >= gap_base && i < (gap_base + gap_size)) { + pr_debug("\toverlap at %d, skipping\n", i); + continue; + } + iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) * + (i - start_seg)); + pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]); + } + + return ptab; +} + +static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu) +{ + int ret; + unsigned long reg, xlate_base; + unsigned int virq; + + if (cell_iommu_find_ioc(iommu->nid, &xlate_base)) + panic("%s: missing IOC register mappings for node %d\n", + __func__, iommu->nid); + + iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size); + iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset; + + /* ensure that the STEs have updated */ + mb(); + + /* setup interrupts for the iommu. */ + reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); + out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, + reg & ~IOC_IO_ExcpStat_V); + out_be64(iommu->xlate_regs + IOC_IO_ExcpMask, + IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE); + + virq = irq_create_mapping(NULL, + IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT)); + BUG_ON(!virq); + + ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu); + BUG_ON(ret); + + /* set the IOC segment table origin register (and turn on the iommu) */ + reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW; + out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg); + in_be64(iommu->xlate_regs + IOC_IOST_Origin); + + /* turn on IO translation */ + reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE; + out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); +} + +static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, + unsigned long base, unsigned long size) +{ + cell_iommu_setup_stab(iommu, base, size, 0, 0); + iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0, + IOMMU_PAGE_SHIFT_4K); + cell_iommu_enable_hardware(iommu); +} + +#if 0/* Unused for now */ +static struct iommu_window *find_window(struct cbe_iommu *iommu, + unsigned long offset, unsigned long size) +{ + struct iommu_window *window; + + /* todo: check for overlapping (but not equal) windows) */ + + list_for_each_entry(window, &(iommu->windows), list) { + if (window->offset == offset && window->size == size) + return window; + } + + return NULL; +} +#endif + +static inline u32 cell_iommu_get_ioid(struct device_node *np) +{ + const u32 *ioid; + + ioid = of_get_property(np, "ioid", NULL); + if (ioid == NULL) { + printk(KERN_WARNING "iommu: missing ioid for %pOF using 0\n", + np); + return 0; + } + + return *ioid; +} + +static struct iommu_table_ops cell_iommu_ops = { + .set = tce_build_cell, + .clear = tce_free_cell +}; + +static struct iommu_window * __init +cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, + unsigned long offset, unsigned long size, + unsigned long pte_offset) +{ + struct iommu_window *window; + struct page *page; + u32 ioid; + + ioid = cell_iommu_get_ioid(np); + + window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid); + BUG_ON(window == NULL); + + window->offset = offset; + window->size = size; + window->ioid = ioid; + window->iommu = iommu; + + window->table.it_blocksize = 16; + window->table.it_base = (unsigned long)iommu->ptab; + window->table.it_index = iommu->nid; + window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; + window->table.it_offset = + (offset >> window->table.it_page_shift) + pte_offset; + window->table.it_size = size >> window->table.it_page_shift; + window->table.it_ops = &cell_iommu_ops; + + if (!iommu_init_table(&window->table, iommu->nid, 0, 0)) + panic("Failed to initialize iommu table"); + + pr_debug("\tioid %d\n", window->ioid); + pr_debug("\tblocksize %ld\n", window->table.it_blocksize); + pr_debug("\tbase 0x%016lx\n", window->table.it_base); + pr_debug("\toffset 0x%lx\n", window->table.it_offset); + pr_debug("\tsize %ld\n", window->table.it_size); + + list_add(&window->list, &iommu->windows); + + if (offset != 0) + return window; + + /* We need to map and reserve the first IOMMU page since it's used + * by the spider workaround. In theory, we only need to do that when + * running on spider but it doesn't really matter. + * + * This code also assumes that we have a window that starts at 0, + * which is the case on all spider based blades. + */ + page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0); + BUG_ON(!page); + iommu->pad_page = page_address(page); + clear_page(iommu->pad_page); + + __set_bit(0, window->table.it_map); + tce_build_cell(&window->table, window->table.it_offset, 1, + (unsigned long)iommu->pad_page, DMA_TO_DEVICE, 0); + + return window; +} + +static struct cbe_iommu *cell_iommu_for_node(int nid) +{ + int i; + + for (i = 0; i < cbe_nr_iommus; i++) + if (iommus[i].nid == nid) + return &iommus[i]; + return NULL; +} + +static unsigned long cell_dma_nommu_offset; + +static unsigned long dma_iommu_fixed_base; +static bool cell_iommu_enabled; + +/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */ +bool iommu_fixed_is_weak; + +static struct iommu_table *cell_get_iommu_table(struct device *dev) +{ + struct iommu_window *window; + struct cbe_iommu *iommu; + + /* Current implementation uses the first window available in that + * node's iommu. We -might- do something smarter later though it may + * never be necessary + */ + iommu = cell_iommu_for_node(dev_to_node(dev)); + if (iommu == NULL || list_empty(&iommu->windows)) { + dev_err(dev, "iommu: missing iommu for %pOF (node %d)\n", + dev->of_node, dev_to_node(dev)); + return NULL; + } + window = list_entry(iommu->windows.next, struct iommu_window, list); + + return &window->table; +} + +static u64 cell_iommu_get_fixed_address(struct device *dev); + +static void cell_dma_dev_setup(struct device *dev) +{ + if (cell_iommu_enabled) { + u64 addr = cell_iommu_get_fixed_address(dev); + + if (addr != OF_BAD_ADDR) + dev->archdata.dma_offset = addr + dma_iommu_fixed_base; + set_iommu_table_base(dev, cell_get_iommu_table(dev)); + } else { + dev->archdata.dma_offset = cell_dma_nommu_offset; + } +} + +static void cell_pci_dma_dev_setup(struct pci_dev *dev) +{ + cell_dma_dev_setup(&dev->dev); +} + +static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct device *dev = data; + + /* We are only interested in device addition */ + if (action != BUS_NOTIFY_ADD_DEVICE) + return 0; + + if (cell_iommu_enabled) + dev->dma_ops = &dma_iommu_ops; + cell_dma_dev_setup(dev); + return 0; +} + +static struct notifier_block cell_of_bus_notifier = { + .notifier_call = cell_of_bus_notify +}; + +static int __init cell_iommu_get_window(struct device_node *np, + unsigned long *base, + unsigned long *size) +{ + const __be32 *dma_window; + unsigned long index; + + /* Use ibm,dma-window if available, else, hard code ! */ + dma_window = of_get_property(np, "ibm,dma-window", NULL); + if (dma_window == NULL) { + *base = 0; + *size = 0x80000000u; + return -ENODEV; + } + + of_parse_dma_window(np, dma_window, &index, base, size); + return 0; +} + +static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np) +{ + struct cbe_iommu *iommu; + int nid, i; + + /* Get node ID */ + nid = of_node_to_nid(np); + if (nid < 0) { + printk(KERN_ERR "iommu: failed to get node for %pOF\n", + np); + return NULL; + } + pr_debug("iommu: setting up iommu for node %d (%pOF)\n", + nid, np); + + /* XXX todo: If we can have multiple windows on the same IOMMU, which + * isn't the case today, we probably want here to check whether the + * iommu for that node is already setup. + * However, there might be issue with getting the size right so let's + * ignore that for now. We might want to completely get rid of the + * multiple window support since the cell iommu supports per-page ioids + */ + + if (cbe_nr_iommus >= NR_IOMMUS) { + printk(KERN_ERR "iommu: too many IOMMUs detected ! (%pOF)\n", + np); + return NULL; + } + + /* Init base fields */ + i = cbe_nr_iommus++; + iommu = &iommus[i]; + iommu->stab = NULL; + iommu->nid = nid; + snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i); + INIT_LIST_HEAD(&iommu->windows); + + return iommu; +} + +static void __init cell_iommu_init_one(struct device_node *np, + unsigned long offset) +{ + struct cbe_iommu *iommu; + unsigned long base, size; + + iommu = cell_iommu_alloc(np); + if (!iommu) + return; + + /* Obtain a window for it */ + cell_iommu_get_window(np, &base, &size); + + pr_debug("\ttranslating window 0x%lx...0x%lx\n", + base, base + size - 1); + + /* Initialize the hardware */ + cell_iommu_setup_hardware(iommu, base, size); + + /* Setup the iommu_table */ + cell_iommu_setup_window(iommu, np, base, size, + offset >> IOMMU_PAGE_SHIFT_4K); +} + +static void __init cell_disable_iommus(void) +{ + int node; + unsigned long base, val; + void __iomem *xregs, *cregs; + + /* Make sure IOC translation is disabled on all nodes */ + for_each_online_node(node) { + if (cell_iommu_find_ioc(node, &base)) + continue; + xregs = ioremap(base, IOC_Reg_Size); + if (xregs == NULL) + continue; + cregs = xregs + IOC_IOCmd_Offset; + + pr_debug("iommu: cleaning up iommu on node %d\n", node); + + out_be64(xregs + IOC_IOST_Origin, 0); + (void)in_be64(xregs + IOC_IOST_Origin); + val = in_be64(cregs + IOC_IOCmd_Cfg); + val &= ~IOC_IOCmd_Cfg_TE; + out_be64(cregs + IOC_IOCmd_Cfg, val); + (void)in_be64(cregs + IOC_IOCmd_Cfg); + + iounmap(xregs); + } +} + +static int __init cell_iommu_init_disabled(void) +{ + struct device_node *np = NULL; + unsigned long base = 0, size; + + /* When no iommu is present, we use direct DMA ops */ + + /* First make sure all IOC translation is turned off */ + cell_disable_iommus(); + + /* If we have no Axon, we set up the spider DMA magic offset */ + np = of_find_node_by_name(NULL, "axon"); + if (!np) + cell_dma_nommu_offset = SPIDER_DMA_OFFSET; + of_node_put(np); + + /* Now we need to check to see where the memory is mapped + * in PCI space. We assume that all busses use the same dma + * window which is always the case so far on Cell, thus we + * pick up the first pci-internal node we can find and check + * the DMA window from there. + */ + for_each_node_by_name(np, "axon") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + if (cell_iommu_get_window(np, &base, &size) == 0) + break; + } + if (np == NULL) { + for_each_node_by_name(np, "pci-internal") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + if (cell_iommu_get_window(np, &base, &size) == 0) + break; + } + } + of_node_put(np); + + /* If we found a DMA window, we check if it's big enough to enclose + * all of physical memory. If not, we force enable IOMMU + */ + if (np && size < memblock_end_of_DRAM()) { + printk(KERN_WARNING "iommu: force-enabled, dma window" + " (%ldMB) smaller than total memory (%lldMB)\n", + size >> 20, memblock_end_of_DRAM() >> 20); + return -ENODEV; + } + + cell_dma_nommu_offset += base; + + if (cell_dma_nommu_offset != 0) + cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; + + printk("iommu: disabled, direct DMA offset is 0x%lx\n", + cell_dma_nommu_offset); + + return 0; +} + +/* + * Fixed IOMMU mapping support + * + * This code adds support for setting up a fixed IOMMU mapping on certain + * cell machines. For 64-bit devices this avoids the performance overhead of + * mapping and unmapping pages at runtime. 32-bit devices are unable to use + * the fixed mapping. + * + * The fixed mapping is established at boot, and maps all of physical memory + * 1:1 into device space at some offset. On machines with < 30 GB of memory + * we setup the fixed mapping immediately above the normal IOMMU window. + * + * For example a machine with 4GB of memory would end up with the normal + * IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In + * this case a 64-bit device wishing to DMA to 1GB would be told to DMA to + * 3GB, plus any offset required by firmware. The firmware offset is encoded + * in the "dma-ranges" property. + * + * On machines with 30GB or more of memory, we are unable to place the fixed + * mapping above the normal IOMMU window as we would run out of address space. + * Instead we move the normal IOMMU window to coincide with the hash page + * table, this region does not need to be part of the fixed mapping as no + * device should ever be DMA'ing to it. We then setup the fixed mapping + * from 0 to 32GB. + */ + +static u64 cell_iommu_get_fixed_address(struct device *dev) +{ + u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR; + struct device_node *np; + const u32 *ranges = NULL; + int i, len, best, naddr, nsize, pna, range_size; + + /* We can be called for platform devices that have no of_node */ + np = of_node_get(dev->of_node); + if (!np) + goto out; + + while (1) { + naddr = of_n_addr_cells(np); + nsize = of_n_size_cells(np); + np = of_get_next_parent(np); + if (!np) + break; + + ranges = of_get_property(np, "dma-ranges", &len); + + /* Ignore empty ranges, they imply no translation required */ + if (ranges && len > 0) + break; + } + + if (!ranges) { + dev_dbg(dev, "iommu: no dma-ranges found\n"); + goto out; + } + + len /= sizeof(u32); + + pna = of_n_addr_cells(np); + range_size = naddr + nsize + pna; + + /* dma-ranges format: + * child addr : naddr cells + * parent addr : pna cells + * size : nsize cells + */ + for (i = 0, best = -1, best_size = 0; i < len; i += range_size) { + cpu_addr = of_translate_dma_address(np, ranges + i + naddr); + size = of_read_number(ranges + i + naddr + pna, nsize); + + if (cpu_addr == 0 && size > best_size) { + best = i; + best_size = size; + } + } + + if (best >= 0) { + dev_addr = of_read_number(ranges + best, naddr); + } else + dev_dbg(dev, "iommu: no suitable range found!\n"); + +out: + of_node_put(np); + + return dev_addr; +} + +static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask) +{ + return mask == DMA_BIT_MASK(64) && + cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR; +} + +static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab, + unsigned long base_pte) +{ + unsigned long segment, offset; + + segment = addr >> IO_SEGMENT_SHIFT; + offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24)); + ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long)); + + pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n", + addr, ptab, segment, offset); + + ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask); +} + +static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, + struct device_node *np, unsigned long dbase, unsigned long dsize, + unsigned long fbase, unsigned long fsize) +{ + unsigned long base_pte, uaddr, ioaddr, *ptab; + + ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24); + + dma_iommu_fixed_base = fbase; + + pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase); + + base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | + (cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask); + + if (iommu_fixed_is_weak) + pr_info("IOMMU: Using weak ordering for fixed mapping\n"); + else { + pr_info("IOMMU: Using strong ordering for fixed mapping\n"); + base_pte |= CBE_IOPTE_SO_RW; + } + + for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) { + /* Don't touch the dynamic region */ + ioaddr = uaddr + fbase; + if (ioaddr >= dbase && ioaddr < (dbase + dsize)) { + pr_debug("iommu: fixed/dynamic overlap, skipping\n"); + continue; + } + + insert_16M_pte(uaddr, ptab, base_pte); + } + + mb(); +} + +static int __init cell_iommu_fixed_mapping_init(void) +{ + unsigned long dbase, dsize, fbase, fsize, hbase, hend; + struct cbe_iommu *iommu; + struct device_node *np; + + /* The fixed mapping is only supported on axon machines */ + np = of_find_node_by_name(NULL, "axon"); + of_node_put(np); + + if (!np) { + pr_debug("iommu: fixed mapping disabled, no axons found\n"); + return -1; + } + + /* We must have dma-ranges properties for fixed mapping to work */ + np = of_find_node_with_property(NULL, "dma-ranges"); + of_node_put(np); + + if (!np) { + pr_debug("iommu: no dma-ranges found, no fixed mapping\n"); + return -1; + } + + /* The default setup is to have the fixed mapping sit after the + * dynamic region, so find the top of the largest IOMMU window + * on any axon, then add the size of RAM and that's our max value. + * If that is > 32GB we have to do other shennanigans. + */ + fbase = 0; + for_each_node_by_name(np, "axon") { + cell_iommu_get_window(np, &dbase, &dsize); + fbase = max(fbase, dbase + dsize); + } + + fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT); + fsize = memblock_phys_mem_size(); + + if ((fbase + fsize) <= 0x800000000ul) + hbase = 0; /* use the device tree window */ + else { + /* If we're over 32 GB we need to cheat. We can't map all of + * RAM with the fixed mapping, and also fit the dynamic + * region. So try to place the dynamic region where the hash + * table sits, drivers never need to DMA to it, we don't + * need a fixed mapping for that area. + */ + if (!htab_address) { + pr_debug("iommu: htab is NULL, on LPAR? Huh?\n"); + return -1; + } + hbase = __pa(htab_address); + hend = hbase + htab_size_bytes; + + /* The window must start and end on a segment boundary */ + if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) || + (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) { + pr_debug("iommu: hash window not segment aligned\n"); + return -1; + } + + /* Check the hash window fits inside the real DMA window */ + for_each_node_by_name(np, "axon") { + cell_iommu_get_window(np, &dbase, &dsize); + + if (hbase < dbase || (hend > (dbase + dsize))) { + pr_debug("iommu: hash window doesn't fit in" + "real DMA window\n"); + of_node_put(np); + return -1; + } + } + + fbase = 0; + } + + /* Setup the dynamic regions */ + for_each_node_by_name(np, "axon") { + iommu = cell_iommu_alloc(np); + BUG_ON(!iommu); + + if (hbase == 0) + cell_iommu_get_window(np, &dbase, &dsize); + else { + dbase = hbase; + dsize = htab_size_bytes; + } + + printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx " + "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase, + dbase + dsize, fbase, fbase + fsize); + + cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize); + iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0, + IOMMU_PAGE_SHIFT_4K); + cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize, + fbase, fsize); + cell_iommu_enable_hardware(iommu); + cell_iommu_setup_window(iommu, np, dbase, dsize, 0); + } + + cell_pci_controller_ops.iommu_bypass_supported = + cell_pci_iommu_bypass_supported; + return 0; +} + +static int iommu_fixed_disabled; + +static int __init setup_iommu_fixed(char *str) +{ + struct device_node *pciep; + + if (strcmp(str, "off") == 0) + iommu_fixed_disabled = 1; + + /* If we can find a pcie-endpoint in the device tree assume that + * we're on a triblade or a CAB so by default the fixed mapping + * should be set to be weakly ordered; but only if the boot + * option WASN'T set for strong ordering + */ + pciep = of_find_node_by_type(NULL, "pcie-endpoint"); + + if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0)) + iommu_fixed_is_weak = true; + + of_node_put(pciep); + + return 1; +} +__setup("iommu_fixed=", setup_iommu_fixed); + +static int __init cell_iommu_init(void) +{ + struct device_node *np; + + /* If IOMMU is disabled or we have little enough RAM to not need + * to enable it, we setup a direct mapping. + * + * Note: should we make sure we have the IOMMU actually disabled ? + */ + if (iommu_is_off || + (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull)) + if (cell_iommu_init_disabled() == 0) + goto bail; + + /* Setup various callbacks */ + cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup; + + if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0) + goto done; + + /* Create an iommu for each /axon node. */ + for_each_node_by_name(np, "axon") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + cell_iommu_init_one(np, 0); + } + + /* Create an iommu for each toplevel /pci-internal node for + * old hardware/firmware + */ + for_each_node_by_name(np, "pci-internal") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + cell_iommu_init_one(np, SPIDER_DMA_OFFSET); + } + done: + /* Setup default PCI iommu ops */ + set_pci_dma_ops(&dma_iommu_ops); + cell_iommu_enabled = true; + bail: + /* Register callbacks on OF platform device addition/removal + * to handle linking them to the right DMA operations + */ + bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier); + + return 0; +} +machine_arch_initcall(cell, cell_iommu_init); diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c new file mode 100644 index 0000000000..58d967ee38 --- /dev/null +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * CBE Pervasive Monitor and Debug + * + * (C) Copyright IBM Corporation 2005 + * + * Authors: Maximino Aguilar (maguilar@us.ibm.com) + * Michael N. Day (mnday@us.ibm.com) + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pervasive.h" +#include "ras.h" + +static void cbe_power_save(void) +{ + unsigned long ctrl, thread_switch_control; + + /* Ensure our interrupt state is properly tracked */ + if (!prep_irq_for_idle()) + return; + + ctrl = mfspr(SPRN_CTRLF); + + /* Enable DEC and EE interrupt request */ + thread_switch_control = mfspr(SPRN_TSC_CELL); + thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST; + + switch (ctrl & CTRL_CT) { + case CTRL_CT0: + thread_switch_control |= TSC_CELL_DEC_ENABLE_0; + break; + case CTRL_CT1: + thread_switch_control |= TSC_CELL_DEC_ENABLE_1; + break; + default: + printk(KERN_WARNING "%s: unknown configuration\n", + __func__); + break; + } + mtspr(SPRN_TSC_CELL, thread_switch_control); + + /* + * go into low thread priority, medium priority will be + * restored for us after wake-up. + */ + HMT_low(); + + /* + * atomically disable thread execution and runlatch. + * External and Decrementer exceptions are still handled when the + * thread is disabled but now enter in cbe_system_reset_exception() + */ + ctrl &= ~(CTRL_RUNLATCH | CTRL_TE); + mtspr(SPRN_CTRLT, ctrl); + + /* Re-enable interrupts in MSR */ + __hard_irq_enable(); +} + +static int cbe_system_reset_exception(struct pt_regs *regs) +{ + switch (regs->msr & SRR1_WAKEMASK) { + case SRR1_WAKEDEC: + set_dec(1); + break; + case SRR1_WAKEEE: + /* + * Handle these when interrupts get re-enabled and we take + * them as regular exceptions. We are in an NMI context + * and can't handle these here. + */ + break; + case SRR1_WAKEMT: + return cbe_sysreset_hack(); +#ifdef CONFIG_CBE_RAS + case SRR1_WAKESYSERR: + cbe_system_error_exception(regs); + break; + case SRR1_WAKETHERM: + cbe_thermal_exception(regs); + break; +#endif /* CONFIG_CBE_RAS */ + default: + /* do system reset */ + return 0; + } + /* everything handled */ + return 1; +} + +void __init cbe_pervasive_init(void) +{ + int cpu; + + if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO)) + return; + + for_each_possible_cpu(cpu) { + struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu); + if (!regs) + continue; + + /* Enable Pause(0) control bit */ + out_be64(®s->pmcr, in_be64(®s->pmcr) | + CBE_PMD_PAUSE_ZERO_CONTROL); + } + + ppc_md.power_save = cbe_power_save; + ppc_md.system_reset_exception = cbe_system_reset_exception; +} diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h new file mode 100644 index 0000000000..0da74ab107 --- /dev/null +++ b/arch/powerpc/platforms/cell/pervasive.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Cell Pervasive Monitor and Debug interface and HW structures + * + * (C) Copyright IBM Corporation 2005 + * + * Authors: Maximino Aguilar (maguilar@us.ibm.com) + * David J. Erb (djerb@us.ibm.com) + */ + + +#ifndef PERVASIVE_H +#define PERVASIVE_H + +extern void cbe_pervasive_init(void); + +#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON +extern int cbe_sysreset_hack(void); +#else +static inline int cbe_sysreset_hack(void) +{ + return 1; +} +#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */ + +#endif diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c new file mode 100644 index 0000000000..b207a7f99b --- /dev/null +++ b/arch/powerpc/platforms/cell/pmu.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Cell Broadband Engine Performance Monitor + * + * (C) Copyright IBM Corporation 2001,2006 + * + * Author: + * David Erb (djerb@us.ibm.com) + * Kevin Corry (kevcorry@us.ibm.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "interrupt.h" + +/* + * When writing to write-only mmio addresses, save a shadow copy. All of the + * registers are 32-bit, but stored in the upper-half of a 64-bit field in + * pmd_regs. + */ + +#define WRITE_WO_MMIO(reg, x) \ + do { \ + u32 _x = (x); \ + struct cbe_pmd_regs __iomem *pmd_regs; \ + struct cbe_pmd_shadow_regs *shadow_regs; \ + pmd_regs = cbe_get_cpu_pmd_regs(cpu); \ + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \ + out_be64(&(pmd_regs->reg), (((u64)_x) << 32)); \ + shadow_regs->reg = _x; \ + } while (0) + +#define READ_SHADOW_REG(val, reg) \ + do { \ + struct cbe_pmd_shadow_regs *shadow_regs; \ + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \ + (val) = shadow_regs->reg; \ + } while (0) + +#define READ_MMIO_UPPER32(val, reg) \ + do { \ + struct cbe_pmd_regs __iomem *pmd_regs; \ + pmd_regs = cbe_get_cpu_pmd_regs(cpu); \ + (val) = (u32)(in_be64(&pmd_regs->reg) >> 32); \ + } while (0) + +/* + * Physical counter registers. + * Each physical counter can act as one 32-bit counter or two 16-bit counters. + */ + +u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr) +{ + u32 val_in_latch, val = 0; + + if (phys_ctr < NR_PHYS_CTRS) { + READ_SHADOW_REG(val_in_latch, counter_value_in_latch); + + /* Read the latch or the actual counter, whichever is newer. */ + if (val_in_latch & (1 << phys_ctr)) { + READ_SHADOW_REG(val, pm_ctr[phys_ctr]); + } else { + READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]); + } + } + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_phys_ctr); + +void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val) +{ + struct cbe_pmd_shadow_regs *shadow_regs; + u32 pm_ctrl; + + if (phys_ctr < NR_PHYS_CTRS) { + /* Writing to a counter only writes to a hardware latch. + * The new value is not propagated to the actual counter + * until the performance monitor is enabled. + */ + WRITE_WO_MMIO(pm_ctr[phys_ctr], val); + + pm_ctrl = cbe_read_pm(cpu, pm_control); + if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) { + /* The counters are already active, so we need to + * rewrite the pm_control register to "re-enable" + * the PMU. + */ + cbe_write_pm(cpu, pm_control, pm_ctrl); + } else { + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); + shadow_regs->counter_value_in_latch |= (1 << phys_ctr); + } + } +} +EXPORT_SYMBOL_GPL(cbe_write_phys_ctr); + +/* + * "Logical" counter registers. + * These will read/write 16-bits or 32-bits depending on the + * current size of the counter. Counters 4 - 7 are always 16-bit. + */ + +u32 cbe_read_ctr(u32 cpu, u32 ctr) +{ + u32 val; + u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1); + + val = cbe_read_phys_ctr(cpu, phys_ctr); + + if (cbe_get_ctr_size(cpu, phys_ctr) == 16) + val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff); + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_ctr); + +void cbe_write_ctr(u32 cpu, u32 ctr, u32 val) +{ + u32 phys_ctr; + u32 phys_val; + + phys_ctr = ctr & (NR_PHYS_CTRS - 1); + + if (cbe_get_ctr_size(cpu, phys_ctr) == 16) { + phys_val = cbe_read_phys_ctr(cpu, phys_ctr); + + if (ctr < NR_PHYS_CTRS) + val = (val << 16) | (phys_val & 0xffff); + else + val = (val & 0xffff) | (phys_val & 0xffff0000); + } + + cbe_write_phys_ctr(cpu, phys_ctr, val); +} +EXPORT_SYMBOL_GPL(cbe_write_ctr); + +/* + * Counter-control registers. + * Each "logical" counter has a corresponding control register. + */ + +u32 cbe_read_pm07_control(u32 cpu, u32 ctr) +{ + u32 pm07_control = 0; + + if (ctr < NR_CTRS) + READ_SHADOW_REG(pm07_control, pm07_control[ctr]); + + return pm07_control; +} +EXPORT_SYMBOL_GPL(cbe_read_pm07_control); + +void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val) +{ + if (ctr < NR_CTRS) + WRITE_WO_MMIO(pm07_control[ctr], val); +} +EXPORT_SYMBOL_GPL(cbe_write_pm07_control); + +/* + * Other PMU control registers. Most of these are write-only. + */ + +u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg) +{ + u32 val = 0; + + switch (reg) { + case group_control: + READ_SHADOW_REG(val, group_control); + break; + + case debug_bus_control: + READ_SHADOW_REG(val, debug_bus_control); + break; + + case trace_address: + READ_MMIO_UPPER32(val, trace_address); + break; + + case ext_tr_timer: + READ_SHADOW_REG(val, ext_tr_timer); + break; + + case pm_status: + READ_MMIO_UPPER32(val, pm_status); + break; + + case pm_control: + READ_SHADOW_REG(val, pm_control); + break; + + case pm_interval: + READ_MMIO_UPPER32(val, pm_interval); + break; + + case pm_start_stop: + READ_SHADOW_REG(val, pm_start_stop); + break; + } + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_pm); + +void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val) +{ + switch (reg) { + case group_control: + WRITE_WO_MMIO(group_control, val); + break; + + case debug_bus_control: + WRITE_WO_MMIO(debug_bus_control, val); + break; + + case trace_address: + WRITE_WO_MMIO(trace_address, val); + break; + + case ext_tr_timer: + WRITE_WO_MMIO(ext_tr_timer, val); + break; + + case pm_status: + WRITE_WO_MMIO(pm_status, val); + break; + + case pm_control: + WRITE_WO_MMIO(pm_control, val); + break; + + case pm_interval: + WRITE_WO_MMIO(pm_interval, val); + break; + + case pm_start_stop: + WRITE_WO_MMIO(pm_start_stop, val); + break; + } +} +EXPORT_SYMBOL_GPL(cbe_write_pm); + +/* + * Get/set the size of a physical counter to either 16 or 32 bits. + */ + +u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr) +{ + u32 pm_ctrl, size = 0; + + if (phys_ctr < NR_PHYS_CTRS) { + pm_ctrl = cbe_read_pm(cpu, pm_control); + size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32; + } + + return size; +} +EXPORT_SYMBOL_GPL(cbe_get_ctr_size); + +void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size) +{ + u32 pm_ctrl; + + if (phys_ctr < NR_PHYS_CTRS) { + pm_ctrl = cbe_read_pm(cpu, pm_control); + switch (ctr_size) { + case 16: + pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr); + break; + + case 32: + pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr); + break; + } + cbe_write_pm(cpu, pm_control, pm_ctrl); + } +} +EXPORT_SYMBOL_GPL(cbe_set_ctr_size); + +/* + * Enable/disable the entire performance monitoring unit. + * When we enable the PMU, all pending writes to counters get committed. + */ + +void cbe_enable_pm(u32 cpu) +{ + struct cbe_pmd_shadow_regs *shadow_regs; + u32 pm_ctrl; + + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); + shadow_regs->counter_value_in_latch = 0; + + pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON; + cbe_write_pm(cpu, pm_control, pm_ctrl); +} +EXPORT_SYMBOL_GPL(cbe_enable_pm); + +void cbe_disable_pm(u32 cpu) +{ + u32 pm_ctrl; + pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON; + cbe_write_pm(cpu, pm_control, pm_ctrl); +} +EXPORT_SYMBOL_GPL(cbe_disable_pm); + +/* + * Reading from the trace_buffer. + * The trace buffer is two 64-bit registers. Reading from + * the second half automatically increments the trace_address. + */ + +void cbe_read_trace_buffer(u32 cpu, u64 *buf) +{ + struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu); + + *buf++ = in_be64(&pmd_regs->trace_buffer_0_63); + *buf++ = in_be64(&pmd_regs->trace_buffer_64_127); +} +EXPORT_SYMBOL_GPL(cbe_read_trace_buffer); + +/* + * Enabling/disabling interrupts for the entire performance monitoring unit. + */ + +u32 cbe_get_and_clear_pm_interrupts(u32 cpu) +{ + /* Reading pm_status clears the interrupt bits. */ + return cbe_read_pm(cpu, pm_status); +} +EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts); + +void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask) +{ + /* Set which node and thread will handle the next interrupt. */ + iic_set_interrupt_routing(cpu, thread, 0); + + /* Enable the interrupt bits in the pm_status register. */ + if (mask) + cbe_write_pm(cpu, pm_status, mask); +} +EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts); + +void cbe_disable_pm_interrupts(u32 cpu) +{ + cbe_get_and_clear_pm_interrupts(cpu); + cbe_write_pm(cpu, pm_status, 0); +} +EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts); + +static irqreturn_t cbe_pm_irq(int irq, void *dev_id) +{ + perf_irq(get_irq_regs()); + return IRQ_HANDLED; +} + +static int __init cbe_init_pm_irq(void) +{ + unsigned int irq; + int rc, node; + + for_each_online_node(node) { + irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI | + (node << IIC_IRQ_NODE_SHIFT)); + if (!irq) { + printk("ERROR: Unable to allocate irq for node %d\n", + node); + return -EINVAL; + } + + rc = request_irq(irq, cbe_pm_irq, + 0, "cbe-pmu-0", NULL); + if (rc) { + printk("ERROR: Request for irq on node %d failed\n", + node); + return rc; + } + } + + return 0; +} +machine_arch_initcall(cell, cbe_init_pm_irq); + +void cbe_sync_irq(int node) +{ + unsigned int irq; + + irq = irq_find_mapping(NULL, + IIC_IRQ_IOEX_PMI + | (node << IIC_IRQ_NODE_SHIFT)); + + if (!irq) { + printk(KERN_WARNING "ERROR, unable to get existing irq %d " \ + "for node %d\n", irq, node); + return; + } + + synchronize_irq(irq); +} +EXPORT_SYMBOL_GPL(cbe_sync_irq); + diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c new file mode 100644 index 0000000000..f6b8792653 --- /dev/null +++ b/arch/powerpc/platforms/cell/ras.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2006-2008, IBM Corporation. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ras.h" +#include "pervasive.h" + +static void dump_fir(int cpu) +{ + struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu); + struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu); + + if (pregs == NULL) + return; + + /* Todo: do some nicer parsing of bits and based on them go down + * to other sub-units FIRs and not only IIC + */ + printk(KERN_ERR "Global Checkstop FIR : 0x%016llx\n", + in_be64(&pregs->checkstop_fir)); + printk(KERN_ERR "Global Recoverable FIR : 0x%016llx\n", + in_be64(&pregs->checkstop_fir)); + printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n", + in_be64(&pregs->spec_att_mchk_fir)); + + if (iregs == NULL) + return; + printk(KERN_ERR "IOC FIR : 0x%016llx\n", + in_be64(&iregs->ioc_fir)); + +} + +DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception) +{ + int cpu = smp_processor_id(); + + printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu); + dump_fir(cpu); + dump_stack(); +} + +DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception) +{ + int cpu = smp_processor_id(); + + /* + * Nothing implemented for the maintenance interrupt at this point + */ + + printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu); + dump_stack(); +} + +DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception) +{ + int cpu = smp_processor_id(); + + /* + * Nothing implemented for the thermal interrupt at this point + */ + + printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu); + dump_stack(); +} + +static int cbe_machine_check_handler(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu); + dump_fir(cpu); + + /* No recovery from this code now, lets continue */ + return 0; +} + +struct ptcal_area { + struct list_head list; + int nid; + int order; + struct page *pages; +}; + +static LIST_HEAD(ptcal_list); + +static int ptcal_start_tok, ptcal_stop_tok; + +static int __init cbe_ptcal_enable_on_node(int nid, int order) +{ + struct ptcal_area *area; + int ret = -ENOMEM; + unsigned long addr; + + if (is_kdump_kernel()) + rtas_call(ptcal_stop_tok, 1, 1, NULL, nid); + + area = kmalloc(sizeof(*area), GFP_KERNEL); + if (!area) + goto out_err; + + area->nid = nid; + area->order = order; + area->pages = __alloc_pages_node(area->nid, + GFP_KERNEL|__GFP_THISNODE, + area->order); + + if (!area->pages) { + printk(KERN_WARNING "%s: no page on node %d\n", + __func__, area->nid); + goto out_free_area; + } + + /* + * We move the ptcal area to the middle of the allocated + * page, in order to avoid prefetches in memcpy and similar + * functions stepping on it. + */ + addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1); + printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n", + __func__, area->nid, addr); + + ret = -EIO; + if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid, + (unsigned int)(addr >> 32), + (unsigned int)(addr & 0xffffffff))) { + printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n", + __func__, nid); + goto out_free_pages; + } + + list_add(&area->list, &ptcal_list); + + return 0; + +out_free_pages: + __free_pages(area->pages, area->order); +out_free_area: + kfree(area); +out_err: + return ret; +} + +static int __init cbe_ptcal_enable(void) +{ + const u32 *size; + struct device_node *np; + int order, found_mic = 0; + + np = of_find_node_by_path("/rtas"); + if (!np) + return -ENODEV; + + size = of_get_property(np, "ibm,cbe-ptcal-size", NULL); + if (!size) { + of_node_put(np); + return -ENODEV; + } + + pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size); + order = get_order(*size); + of_node_put(np); + + /* support for malta device trees, with be@/mic@ nodes */ + for_each_node_by_type(np, "mic-tm") { + cbe_ptcal_enable_on_node(of_node_to_nid(np), order); + found_mic = 1; + } + + if (found_mic) + return 0; + + /* support for older device tree - use cpu nodes */ + for_each_node_by_type(np, "cpu") { + const u32 *nid = of_get_property(np, "node-id", NULL); + if (!nid) { + printk(KERN_ERR "%s: node %pOF is missing node-id?\n", + __func__, np); + continue; + } + cbe_ptcal_enable_on_node(*nid, order); + found_mic = 1; + } + + return found_mic ? 0 : -ENODEV; +} + +static int cbe_ptcal_disable(void) +{ + struct ptcal_area *area, *tmp; + int ret = 0; + + pr_debug("%s: disabling PTCAL\n", __func__); + + list_for_each_entry_safe(area, tmp, &ptcal_list, list) { + /* disable ptcal on this node */ + if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) { + printk(KERN_ERR "%s: error disabling PTCAL " + "on node %d!\n", __func__, + area->nid); + ret = -EIO; + continue; + } + + /* ensure we can access the PTCAL area */ + memset(page_address(area->pages), 0, + 1 << (area->order + PAGE_SHIFT)); + + /* clean up */ + list_del(&area->list); + __free_pages(area->pages, area->order); + kfree(area); + } + + return ret; +} + +static int cbe_ptcal_notify_reboot(struct notifier_block *nb, + unsigned long code, void *data) +{ + return cbe_ptcal_disable(); +} + +static void cbe_ptcal_crash_shutdown(void) +{ + cbe_ptcal_disable(); +} + +static struct notifier_block cbe_ptcal_reboot_notifier = { + .notifier_call = cbe_ptcal_notify_reboot +}; + +#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON +static int sysreset_hack; + +static int __init cbe_sysreset_init(void) +{ + struct cbe_pmd_regs __iomem *regs; + + sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0"); + if (!sysreset_hack) + return 0; + + regs = cbe_get_cpu_pmd_regs(0); + if (!regs) + return 0; + + /* Enable JTAG system-reset hack */ + out_be32(®s->fir_mode_reg, + in_be32(®s->fir_mode_reg) | + CBE_PMD_FIR_MODE_M8); + + return 0; +} +device_initcall(cbe_sysreset_init); + +int cbe_sysreset_hack(void) +{ + struct cbe_pmd_regs __iomem *regs; + + /* + * The BMC can inject user triggered system reset exceptions, + * but cannot set the system reset reason in srr1, + * so check an extra register here. + */ + if (sysreset_hack && (smp_processor_id() == 0)) { + regs = cbe_get_cpu_pmd_regs(0); + if (!regs) + return 0; + if (in_be64(®s->ras_esc_0) & 0x0000ffff) { + out_be64(®s->ras_esc_0, 0); + return 0; + } + } + return 1; +} +#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */ + +static int __init cbe_ptcal_init(void) +{ + int ret; + ptcal_start_tok = rtas_function_token(RTAS_FN_IBM_CBE_START_PTCAL); + ptcal_stop_tok = rtas_function_token(RTAS_FN_IBM_CBE_STOP_PTCAL); + + if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE + || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier); + if (ret) + goto out1; + + ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown); + if (ret) + goto out2; + + return cbe_ptcal_enable(); + +out2: + unregister_reboot_notifier(&cbe_ptcal_reboot_notifier); +out1: + printk(KERN_ERR "Can't disable PTCAL, so not enabling\n"); + return ret; +} + +arch_initcall(cbe_ptcal_init); + +void __init cbe_ras_init(void) +{ + unsigned long hid0; + + /* + * Enable System Error & thermal interrupts and wakeup conditions + */ + + hid0 = mfspr(SPRN_HID0); + hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP | + HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP; + mtspr(SPRN_HID0, hid0); + mb(); + + /* + * Install machine check handler. Leave setting of precise mode to + * what the firmware did for now + */ + ppc_md.machine_check_exception = cbe_machine_check_handler; + mb(); + + /* + * For now, we assume that IOC_FIR is already set to forward some + * error conditions to the System Error handler. If that is not true + * then it will have to be fixed up here. + */ +} diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h new file mode 100644 index 0000000000..226dbd48ef --- /dev/null +++ b/arch/powerpc/platforms/cell/ras.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef RAS_H +#define RAS_H + +#include + +DECLARE_INTERRUPT_HANDLER(cbe_system_error_exception); +DECLARE_INTERRUPT_HANDLER(cbe_maintenance_exception); +DECLARE_INTERRUPT_HANDLER(cbe_thermal_exception); + +extern void cbe_ras_init(void); + +#endif /* RAS_H */ diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c new file mode 100644 index 0000000000..f64a1ef98a --- /dev/null +++ b/arch/powerpc/platforms/cell/setup.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * linux/arch/powerpc/platforms/cell/cell_setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * Modified by Cell Team, IBM Deutschland Entwicklung GmbH + */ +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cell.h" +#include "interrupt.h" +#include "pervasive.h" +#include "ras.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +static void cell_show_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + of_node_put(root); +} + +static void cell_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + +static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev) +{ + struct pci_controller *hose; + const char *s; + int i; + + if (!machine_is(cell)) + return; + + /* We're searching for a direct child of the PHB */ + if (dev->bus->self != NULL || dev->devfn != 0) + return; + + hose = pci_bus_to_host(dev->bus); + if (hose == NULL) + return; + + /* Only on PCIE */ + if (!of_device_is_compatible(hose->dn, "pciex")) + return; + + /* And only on axon */ + s = of_get_property(hose->dn, "model", NULL); + if (!s || strcmp(s, "Axon") != 0) + return; + + for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + } + + printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n", + pci_name(dev)); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex); + +static int cell_setup_phb(struct pci_controller *phb) +{ + const char *model; + struct device_node *np; + + int rc = rtas_setup_phb(phb); + if (rc) + return rc; + + phb->controller_ops = cell_pci_controller_ops; + + np = phb->dn; + model = of_get_property(np, "model", NULL); + if (model == NULL || !of_node_name_eq(np, "pci")) + return 0; + + /* Setup workarounds for spider */ + if (strcmp(model, "Spider")) + return 0; + + iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init, + (void *)SPIDER_PCI_REG_BASE); + return 0; +} + +static const struct of_device_id cell_bus_ids[] __initconst = { + { .type = "soc", }, + { .compatible = "soc", }, + { .type = "spider", }, + { .type = "axon", }, + { .type = "plb5", }, + { .type = "plb4", }, + { .type = "opb", }, + { .type = "ebc", }, + {}, +}; + +static int __init cell_publish_devices(void) +{ + struct device_node *root = of_find_node_by_path("/"); + struct device_node *np; + int node; + + /* Publish OF platform devices for southbridge IOs */ + of_platform_bus_probe(NULL, cell_bus_ids, NULL); + + /* On spider based blades, we need to manually create the OF + * platform devices for the PCI host bridges + */ + for_each_child_of_node(root, np) { + if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "pciex")) + continue; + of_platform_device_create(np, NULL, NULL); + } + + of_node_put(root); + + /* There is no device for the MIC memory controller, thus we create + * a platform device for it to attach the EDAC driver to. + */ + for_each_online_node(node) { + if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL) + continue; + platform_device_register_simple("cbe-mic", node, NULL, 0); + } + + return 0; +} +machine_subsys_initcall(cell, cell_publish_devices); + +static void __init mpic_init_IRQ(void) +{ + struct device_node *dn; + struct mpic *mpic; + + for_each_node_by_name(dn, "interrupt-controller") { + if (!of_device_is_compatible(dn, "CBEA,platform-open-pic")) + continue; + + /* The MPIC driver will get everything it needs from the + * device-tree, just pass 0 to all arguments + */ + mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET, + 0, 0, " MPIC "); + if (mpic == NULL) + continue; + mpic_init(mpic); + } +} + + +static void __init cell_init_irq(void) +{ + iic_init_IRQ(); + spider_init_IRQ(); + mpic_init_IRQ(); +} + +static void __init cell_set_dabrx(void) +{ + mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER); +} + +static void __init cell_setup_arch(void) +{ +#ifdef CONFIG_SPU_BASE + spu_priv1_ops = &spu_priv1_mmio_ops; + spu_management_ops = &spu_management_of_ops; +#endif + + cbe_regs_init(); + + cell_set_dabrx(); + +#ifdef CONFIG_CBE_RAS + cbe_ras_init(); +#endif + +#ifdef CONFIG_SMP + smp_init_cell(); +#endif + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + + cbe_pervasive_init(); + + mmio_nvram_init(); +} + +static int __init cell_probe(void) +{ + if (!of_machine_is_compatible("IBM,CBEA") && + !of_machine_is_compatible("IBM,CPBW-1.0")) + return 0; + + pm_power_off = rtas_power_off; + + return 1; +} + +define_machine(cell) { + .name = "Cell", + .probe = cell_probe, + .setup_arch = cell_setup_arch, + .show_cpuinfo = cell_show_cpuinfo, + .restart = rtas_restart, + .halt = rtas_halt, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .progress = cell_progress, + .init_IRQ = cell_init_irq, + .pci_setup_phb = cell_setup_phb, +}; + +struct pci_controller_ops cell_pci_controller_ops; diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c new file mode 100644 index 0000000000..30394c6f88 --- /dev/null +++ b/arch/powerpc/platforms/cell/smp.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SMP support for BPA machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "interrupt.h" +#include + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +/* + * The Primary thread of each non-boot processor was started from the OF client + * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop. + */ +static cpumask_t of_spin_map; + +/** + * smp_startup_cpu() - start the given cpu + * + * At boot time, there is nothing to do for primary threads which were + * started from Open Firmware. For anything else, call RTAS with the + * appropriate start location. + * + * Returns: + * 0 - failure + * 1 - success + */ +static inline int smp_startup_cpu(unsigned int lcpu) +{ + int status; + unsigned long start_here = + __pa(ppc_function_entry(generic_secondary_smp_init)); + unsigned int pcpu; + int start_cpu; + + if (cpumask_test_cpu(lcpu, &of_spin_map)) + /* Already started by OF and sitting in spin loop */ + return 1; + + pcpu = get_hard_smp_processor_id(lcpu); + + /* + * If the RTAS start-cpu token does not exist then presume the + * cpu is already spinning. + */ + start_cpu = rtas_function_token(RTAS_FN_START_CPU); + if (start_cpu == RTAS_UNKNOWN_SERVICE) + return 1; + + status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu); + if (status != 0) { + printk(KERN_ERR "start-cpu failed: %i\n", status); + return 0; + } + + return 1; +} + +static void smp_cell_setup_cpu(int cpu) +{ + if (cpu != boot_cpuid) + iic_setup_cpu(); + + /* + * change default DABRX to allow user watchpoints + */ + mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER); +} + +static int smp_cell_kick_cpu(int nr) +{ + if (nr < 0 || nr >= nr_cpu_ids) + return -EINVAL; + + if (!smp_startup_cpu(nr)) + return -ENOENT; + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca_ptrs[nr]->cpu_start = 1; + + return 0; +} + +static struct smp_ops_t bpa_iic_smp_ops = { + .message_pass = iic_message_pass, + .probe = iic_request_IPIs, + .kick_cpu = smp_cell_kick_cpu, + .setup_cpu = smp_cell_setup_cpu, + .cpu_bootable = smp_generic_cpu_bootable, +}; + +/* This is called very early */ +void __init smp_init_cell(void) +{ + int i; + + DBG(" -> smp_init_cell()\n"); + + smp_ops = &bpa_iic_smp_ops; + + /* Mark threads which are still spinning in hold loops. */ + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, &of_spin_map); + } + } else + cpumask_copy(&of_spin_map, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, &of_spin_map); + + /* Non-lpar has additional take/give timebase */ + if (rtas_function_token(RTAS_FN_FREEZE_TIME_BASE) != RTAS_UNKNOWN_SERVICE) { + smp_ops->give_timebase = rtas_give_timebase; + smp_ops->take_timebase = rtas_take_timebase; + } + + DBG(" <- smp_init_cell()\n"); +} diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c new file mode 100644 index 0000000000..68439445b1 --- /dev/null +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * IO workarounds for PCI on Celleb/Cell platform + * + * (C) Copyright 2006-2007 TOSHIBA CORPORATION + */ + +#undef DEBUG + +#include +#include +#include +#include + +#include +#include +#include + +#define SPIDER_PCI_DISABLE_PREFETCH + +struct spiderpci_iowa_private { + void __iomem *regs; +}; + +static void spiderpci_io_flush(struct iowa_bus *bus) +{ + struct spiderpci_iowa_private *priv; + + priv = bus->private; + in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + iosync(); +} + +#define SPIDER_PCI_MMIO_READ(name, ret) \ +static ret spiderpci_##name(const PCI_IO_ADDR addr) \ +{ \ + ret val = __do_##name(addr); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ + return val; \ +} + +#define SPIDER_PCI_MMIO_READ_STR(name) \ +static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \ + unsigned long count) \ +{ \ + __do_##name(addr, buf, count); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ +} + +SPIDER_PCI_MMIO_READ(readb, u8) +SPIDER_PCI_MMIO_READ(readw, u16) +SPIDER_PCI_MMIO_READ(readl, u32) +SPIDER_PCI_MMIO_READ(readq, u64) +SPIDER_PCI_MMIO_READ(readw_be, u16) +SPIDER_PCI_MMIO_READ(readl_be, u32) +SPIDER_PCI_MMIO_READ(readq_be, u64) +SPIDER_PCI_MMIO_READ_STR(readsb) +SPIDER_PCI_MMIO_READ_STR(readsw) +SPIDER_PCI_MMIO_READ_STR(readsl) + +static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src, + unsigned long n) +{ + __do_memcpy_fromio(dest, src, n); + spiderpci_io_flush(iowa_mem_find_bus(src)); +} + +static int __init spiderpci_pci_setup_chip(struct pci_controller *phb, + void __iomem *regs) +{ + void *dummy_page_va; + dma_addr_t dummy_page_da; + +#ifdef SPIDER_PCI_DISABLE_PREFETCH + u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT); + pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val); + out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8); +#endif /* SPIDER_PCI_DISABLE_PREFETCH */ + + /* setup dummy read */ + /* + * On CellBlade, we can't know that which XDR memory is used by + * kmalloc() to allocate dummy_page_va. + * In order to improve the performance, the XDR which is used to + * allocate dummy_page_va is the nearest the spider-pci. + * We have to select the CBE which is the nearest the spider-pci + * to allocate memory from the best XDR, but I don't know that + * how to do. + * + * Celleb does not have this problem, because it has only one XDR. + */ + dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!dummy_page_va) { + pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n"); + return -1; + } + + dummy_page_da = dma_map_single(phb->parent, dummy_page_va, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(phb->parent, dummy_page_da)) { + pr_err("SPIDER-IOWA:Map dummy page filed.\n"); + kfree(dummy_page_va); + return -1; + } + + out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da); + + return 0; +} + +int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data) +{ + void __iomem *regs = NULL; + struct spiderpci_iowa_private *priv; + struct device_node *np = bus->phb->dn; + struct resource r; + unsigned long offset = (unsigned long)data; + + pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n", + np); + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + pr_err("SPIDERPCI-IOWA:" + "Can't allocate struct spiderpci_iowa_private"); + return -1; + } + + if (of_address_to_resource(np, 0, &r)) { + pr_err("SPIDERPCI-IOWA:Can't get resource.\n"); + goto error; + } + + regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE); + if (!regs) { + pr_err("SPIDERPCI-IOWA:ioremap failed.\n"); + goto error; + } + priv->regs = regs; + bus->private = priv; + + if (spiderpci_pci_setup_chip(bus->phb, regs)) + goto error; + + return 0; + +error: + kfree(priv); + bus->private = NULL; + + if (regs) + iounmap(regs); + + return -1; +} + +struct ppc_pci_io spiderpci_ops = { + .readb = spiderpci_readb, + .readw = spiderpci_readw, + .readl = spiderpci_readl, + .readq = spiderpci_readq, + .readw_be = spiderpci_readw_be, + .readl_be = spiderpci_readl_be, + .readq_be = spiderpci_readq_be, + .readsb = spiderpci_readsb, + .readsw = spiderpci_readsw, + .readsl = spiderpci_readsl, + .memcpy_fromio = spiderpci_memcpy_fromio, +}; + diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c new file mode 100644 index 0000000000..11df737c8c --- /dev/null +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * External Interrupt Controller on Spider South Bridge + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "interrupt.h" + +/* register layout taken from Spider spec, table 7.4-4 */ +enum { + TIR_DEN = 0x004, /* Detection Enable Register */ + TIR_MSK = 0x084, /* Mask Level Register */ + TIR_EDC = 0x0c0, /* Edge Detection Clear Register */ + TIR_PNDA = 0x100, /* Pending Register A */ + TIR_PNDB = 0x104, /* Pending Register B */ + TIR_CS = 0x144, /* Current Status Register */ + TIR_LCSA = 0x150, /* Level Current Status Register A */ + TIR_LCSB = 0x154, /* Level Current Status Register B */ + TIR_LCSC = 0x158, /* Level Current Status Register C */ + TIR_LCSD = 0x15c, /* Level Current Status Register D */ + TIR_CFGA = 0x200, /* Setting Register A0 */ + TIR_CFGB = 0x204, /* Setting Register B0 */ + /* 0x208 ... 0x3ff Setting Register An/Bn */ + TIR_PPNDA = 0x400, /* Packet Pending Register A */ + TIR_PPNDB = 0x404, /* Packet Pending Register B */ + TIR_PIERA = 0x408, /* Packet Output Error Register A */ + TIR_PIERB = 0x40c, /* Packet Output Error Register B */ + TIR_PIEN = 0x444, /* Packet Output Enable Register */ + TIR_PIPND = 0x454, /* Packet Output Pending Register */ + TIRDID = 0x484, /* Spider Device ID Register */ + REISTIM = 0x500, /* Reissue Command Timeout Time Setting */ + REISTIMEN = 0x504, /* Reissue Command Timeout Setting */ + REISWAITEN = 0x508, /* Reissue Wait Control*/ +}; + +#define SPIDER_CHIP_COUNT 4 +#define SPIDER_SRC_COUNT 64 +#define SPIDER_IRQ_INVALID 63 + +struct spider_pic { + struct irq_domain *host; + void __iomem *regs; + unsigned int node_id; +}; +static struct spider_pic spider_pics[SPIDER_CHIP_COUNT]; + +static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d) +{ + return irq_data_get_irq_chip_data(d); +} + +static void __iomem *spider_get_irq_config(struct spider_pic *pic, + unsigned int src) +{ + return pic->regs + TIR_CFGA + 8 * src; +} + +static void spider_unmask_irq(struct irq_data *d) +{ + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); + + out_be32(cfg, in_be32(cfg) | 0x30000000u); +} + +static void spider_mask_irq(struct irq_data *d) +{ + struct spider_pic *pic = spider_irq_data_to_pic(d); + void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d)); + + out_be32(cfg, in_be32(cfg) & ~0x30000000u); +} + +static void spider_ack_irq(struct irq_data *d) +{ + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int src = irqd_to_hwirq(d); + + /* Reset edge detection logic if necessary + */ + if (irqd_is_level_type(d)) + return; + + /* Only interrupts 47 to 50 can be set to edge */ + if (src < 47 || src > 50) + return; + + /* Perform the clear of the edge logic */ + out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf)); +} + +static int spider_set_irq_type(struct irq_data *d, unsigned int type) +{ + unsigned int sense = type & IRQ_TYPE_SENSE_MASK; + struct spider_pic *pic = spider_irq_data_to_pic(d); + unsigned int hw = irqd_to_hwirq(d); + void __iomem *cfg = spider_get_irq_config(pic, hw); + u32 old_mask; + u32 ic; + + /* Note that only level high is supported for most interrupts */ + if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH && + (hw < 47 || hw > 50)) + return -EINVAL; + + /* Decode sense type */ + switch(sense) { + case IRQ_TYPE_EDGE_RISING: + ic = 0x3; + break; + case IRQ_TYPE_EDGE_FALLING: + ic = 0x2; + break; + case IRQ_TYPE_LEVEL_LOW: + ic = 0x0; + break; + case IRQ_TYPE_LEVEL_HIGH: + case IRQ_TYPE_NONE: + ic = 0x1; + break; + default: + return -EINVAL; + } + + /* Configure the source. One gross hack that was there before and + * that I've kept around is the priority to the BE which I set to + * be the same as the interrupt source number. I don't know whether + * that's supposed to make any kind of sense however, we'll have to + * decide that, but for now, I'm not changing the behaviour. + */ + old_mask = in_be32(cfg) & 0x30000000u; + out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) | + (pic->node_id << 4) | 0xe); + out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff)); + + return 0; +} + +static struct irq_chip spider_pic = { + .name = "SPIDER", + .irq_unmask = spider_unmask_irq, + .irq_mask = spider_mask_irq, + .irq_ack = spider_ack_irq, + .irq_set_type = spider_set_irq_type, +}; + +static int spider_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_chip_data(virq, h->host_data); + irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq); + + /* Set default irq type */ + irq_set_irq_type(virq, IRQ_TYPE_NONE); + + return 0; +} + +static int spider_host_xlate(struct irq_domain *h, struct device_node *ct, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) + +{ + /* Spider interrupts have 2 cells, first is the interrupt source, + * second, well, I don't know for sure yet ... We mask the top bits + * because old device-trees encode a node number in there + */ + *out_hwirq = intspec[0] & 0x3f; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + return 0; +} + +static const struct irq_domain_ops spider_host_ops = { + .map = spider_host_map, + .xlate = spider_host_xlate, +}; + +static void spider_irq_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct spider_pic *pic = irq_desc_get_handler_data(desc); + unsigned int cs; + + cs = in_be32(pic->regs + TIR_CS) >> 24; + if (cs != SPIDER_IRQ_INVALID) + generic_handle_domain_irq(pic->host, cs); + + chip->irq_eoi(&desc->irq_data); +} + +/* For hooking up the cascade we have a problem. Our device-tree is + * crap and we don't know on which BE iic interrupt we are hooked on at + * least not the "standard" way. We can reconstitute it based on two + * informations though: which BE node we are connected to and whether + * we are connected to IOIF0 or IOIF1. Right now, we really only care + * about the IBM cell blade and we know that its firmware gives us an + * interrupt-map property which is pretty strange. + */ +static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) +{ + unsigned int virq; + const u32 *imap, *tmp; + int imaplen, intsize, unit; + struct device_node *iic; + struct device_node *of_node; + + of_node = irq_domain_get_of_node(pic->host); + + /* First, we check whether we have a real "interrupts" in the device + * tree in case the device-tree is ever fixed + */ + virq = irq_of_parse_and_map(of_node, 0); + if (virq) + return virq; + + /* Now do the horrible hacks */ + tmp = of_get_property(of_node, "#interrupt-cells", NULL); + if (tmp == NULL) + return 0; + intsize = *tmp; + imap = of_get_property(of_node, "interrupt-map", &imaplen); + if (imap == NULL || imaplen < (intsize + 1)) + return 0; + iic = of_find_node_by_phandle(imap[intsize]); + if (iic == NULL) + return 0; + imap += intsize + 1; + tmp = of_get_property(iic, "#interrupt-cells", NULL); + if (tmp == NULL) { + of_node_put(iic); + return 0; + } + intsize = *tmp; + /* Assume unit is last entry of interrupt specifier */ + unit = imap[intsize - 1]; + /* Ok, we have a unit, now let's try to get the node */ + tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL); + if (tmp == NULL) { + of_node_put(iic); + return 0; + } + /* ugly as hell but works for now */ + pic->node_id = (*tmp) >> 1; + of_node_put(iic); + + /* Ok, now let's get cracking. You may ask me why I just didn't match + * the iic host from the iic OF node, but that way I'm still compatible + * with really really old old firmwares for which we don't have a node + */ + /* Manufacture an IIC interrupt number of class 2 */ + virq = irq_create_mapping(NULL, + (pic->node_id << IIC_IRQ_NODE_SHIFT) | + (2 << IIC_IRQ_CLASS_SHIFT) | + unit); + if (!virq) + printk(KERN_ERR "spider_pic: failed to map cascade !"); + return virq; +} + + +static void __init spider_init_one(struct device_node *of_node, int chip, + unsigned long addr) +{ + struct spider_pic *pic = &spider_pics[chip]; + int i, virq; + + /* Map registers */ + pic->regs = ioremap(addr, 0x1000); + if (pic->regs == NULL) + panic("spider_pic: can't map registers !"); + + /* Allocate a host */ + pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT, + &spider_host_ops, pic); + if (pic->host == NULL) + panic("spider_pic: can't allocate irq host !"); + + /* Go through all sources and disable them */ + for (i = 0; i < SPIDER_SRC_COUNT; i++) { + void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i; + out_be32(cfg, in_be32(cfg) & ~0x30000000u); + } + + /* do not mask any interrupts because of level */ + out_be32(pic->regs + TIR_MSK, 0x0); + + /* enable interrupt packets to be output */ + out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1); + + /* Hook up the cascade interrupt to the iic and nodeid */ + virq = spider_find_cascade_and_node(pic); + if (!virq) + return; + irq_set_handler_data(virq, pic); + irq_set_chained_handler(virq, spider_irq_cascade); + + printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %pOF\n", + pic->node_id, addr, of_node); + + /* Enable the interrupt detection enable bit. Do this last! */ + out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1); +} + +void __init spider_init_IRQ(void) +{ + struct resource r; + struct device_node *dn; + int chip = 0; + + /* XXX node numbers are totally bogus. We _hope_ we get the device + * nodes in the right order here but that's definitely not guaranteed, + * we need to get the node from the device tree instead. + * There is currently no proper property for it (but our whole + * device-tree is bogus anyway) so all we can do is pray or maybe test + * the address and deduce the node-id + */ + for_each_node_by_name(dn, "interrupt-controller") { + if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) { + if (of_address_to_resource(dn, 0, &r)) { + printk(KERN_WARNING "spider-pic: Failed\n"); + continue; + } + } else if (of_device_is_compatible(dn, "sti,platform-spider-pic") + && (chip < 2)) { + static long hard_coded_pics[] = + { 0x24000008000ul, 0x34000008000ul}; + r.start = hard_coded_pics[chip]; + } else + continue; + spider_init_one(dn, chip++, r.start); + } +} diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c new file mode 100644 index 0000000000..dea6f0f258 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -0,0 +1,790 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Low-level SPU handling + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct spu_management_ops *spu_management_ops; +EXPORT_SYMBOL_GPL(spu_management_ops); + +const struct spu_priv1_ops *spu_priv1_ops; +EXPORT_SYMBOL_GPL(spu_priv1_ops); + +struct cbe_spu_info cbe_spu_info[MAX_NUMNODES]; +EXPORT_SYMBOL_GPL(cbe_spu_info); + +/* + * The spufs fault-handling code needs to call force_sig_fault to raise signals + * on DMA errors. Export it here to avoid general kernel-wide access to this + * function + */ +EXPORT_SYMBOL_GPL(force_sig_fault); + +/* + * Protects cbe_spu_info and spu->number. + */ +static DEFINE_SPINLOCK(spu_lock); + +/* + * List of all spus in the system. + * + * This list is iterated by callers from irq context and callers that + * want to sleep. Thus modifications need to be done with both + * spu_full_list_lock and spu_full_list_mutex held, while iterating + * through it requires either of these locks. + * + * In addition spu_full_list_lock protects all assignments to + * spu->mm. + */ +static LIST_HEAD(spu_full_list); +static DEFINE_SPINLOCK(spu_full_list_lock); +static DEFINE_MUTEX(spu_full_list_mutex); + +void spu_invalidate_slbs(struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + unsigned long flags; + + spin_lock_irqsave(&spu->register_lock, flags); + if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) + out_be64(&priv2->slb_invalidate_all_W, 0UL); + spin_unlock_irqrestore(&spu->register_lock, flags); +} +EXPORT_SYMBOL_GPL(spu_invalidate_slbs); + +/* This is called by the MM core when a segment size is changed, to + * request a flush of all the SPEs using a given mm + */ +void spu_flush_all_slbs(struct mm_struct *mm) +{ + struct spu *spu; + unsigned long flags; + + spin_lock_irqsave(&spu_full_list_lock, flags); + list_for_each_entry(spu, &spu_full_list, full_list) { + if (spu->mm == mm) + spu_invalidate_slbs(spu); + } + spin_unlock_irqrestore(&spu_full_list_lock, flags); +} + +/* The hack below stinks... try to do something better one of + * these days... Does it even work properly with NR_CPUS == 1 ? + */ +static inline void mm_needs_global_tlbie(struct mm_struct *mm) +{ + int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; + + /* Global TLBIE broadcast required with SPEs. */ + bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr); +} + +void spu_associate_mm(struct spu *spu, struct mm_struct *mm) +{ + unsigned long flags; + + spin_lock_irqsave(&spu_full_list_lock, flags); + spu->mm = mm; + spin_unlock_irqrestore(&spu_full_list_lock, flags); + if (mm) + mm_needs_global_tlbie(mm); +} +EXPORT_SYMBOL_GPL(spu_associate_mm); + +int spu_64k_pages_available(void) +{ + return mmu_psize_defs[MMU_PAGE_64K].shift != 0; +} +EXPORT_SYMBOL_GPL(spu_64k_pages_available); + +static void spu_restart_dma(struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags)) + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); + else { + set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags); + mb(); + } +} + +static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n", + __func__, slbe, slb->vsid, slb->esid); + + out_be64(&priv2->slb_index_W, slbe); + /* set invalid before writing vsid */ + out_be64(&priv2->slb_esid_RW, 0); + /* now it's safe to write the vsid */ + out_be64(&priv2->slb_vsid_RW, slb->vsid); + /* setting the new esid makes the entry valid again */ + out_be64(&priv2->slb_esid_RW, slb->esid); +} + +static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) +{ + struct copro_slb slb; + int ret; + + ret = copro_calculate_slb(spu->mm, ea, &slb); + if (ret) + return ret; + + spu_load_slb(spu, spu->slb_replace, &slb); + + spu->slb_replace++; + if (spu->slb_replace >= 8) + spu->slb_replace = 0; + + spu_restart_dma(spu); + spu->stats.slb_flt++; + return 0; +} + +extern int hash_page(unsigned long ea, unsigned long access, + unsigned long trap, unsigned long dsisr); //XXX +static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) +{ + int ret; + + pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea); + + /* + * Handle kernel space hash faults immediately. User hash + * faults need to be deferred to process context. + */ + if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) && + (get_region_id(ea) != USER_REGION_ID)) { + + spin_unlock(&spu->register_lock); + ret = hash_page(ea, + _PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED, + 0x300, dsisr); + spin_lock(&spu->register_lock); + + if (!ret) { + spu_restart_dma(spu); + return 0; + } + } + + spu->class_1_dar = ea; + spu->class_1_dsisr = dsisr; + + spu->stop_callback(spu, 1); + + spu->class_1_dar = 0; + spu->class_1_dsisr = 0; + + return 0; +} + +static void __spu_kernel_slb(void *addr, struct copro_slb *slb) +{ + unsigned long ea = (unsigned long)addr; + u64 llp; + + if (get_region_id(ea) == LINEAR_MAP_REGION_ID) + llp = mmu_psize_defs[mmu_linear_psize].sllp; + else + llp = mmu_psize_defs[mmu_virtual_psize].sllp; + + slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) | + SLB_VSID_KERNEL | llp; + slb->esid = (ea & ESID_MASK) | SLB_ESID_V; +} + +/** + * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the + * address @new_addr is present. + */ +static inline int __slb_present(struct copro_slb *slbs, int nr_slbs, + void *new_addr) +{ + unsigned long ea = (unsigned long)new_addr; + int i; + + for (i = 0; i < nr_slbs; i++) + if (!((slbs[i].esid ^ ea) & ESID_MASK)) + return 1; + + return 0; +} + +/** + * Setup the SPU kernel SLBs, in preparation for a context save/restore. We + * need to map both the context save area, and the save/restore code. + * + * Because the lscsa and code may cross segment boundaries, we check to see + * if mappings are required for the start and end of each range. We currently + * assume that the mappings are smaller that one segment - if not, something + * is seriously wrong. + */ +void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa, + void *code, int code_size) +{ + struct copro_slb slbs[4]; + int i, nr_slbs = 0; + /* start and end addresses of both mappings */ + void *addrs[] = { + lscsa, (void *)lscsa + sizeof(*lscsa) - 1, + code, code + code_size - 1 + }; + + /* check the set of addresses, and create a new entry in the slbs array + * if there isn't already a SLB for that address */ + for (i = 0; i < ARRAY_SIZE(addrs); i++) { + if (__slb_present(slbs, nr_slbs, addrs[i])) + continue; + + __spu_kernel_slb(addrs[i], &slbs[nr_slbs]); + nr_slbs++; + } + + spin_lock_irq(&spu->register_lock); + /* Add the set of SLBs */ + for (i = 0; i < nr_slbs; i++) + spu_load_slb(spu, i, &slbs[i]); + spin_unlock_irq(&spu->register_lock); +} +EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs); + +static irqreturn_t +spu_irq_class_0(int irq, void *data) +{ + struct spu *spu; + unsigned long stat, mask; + + spu = data; + + spin_lock(&spu->register_lock); + mask = spu_int_mask_get(spu, 0); + stat = spu_int_stat_get(spu, 0) & mask; + + spu->class_0_pending |= stat; + spu->class_0_dar = spu_mfc_dar_get(spu); + spu->stop_callback(spu, 0); + spu->class_0_pending = 0; + spu->class_0_dar = 0; + + spu_int_stat_clear(spu, 0, stat); + spin_unlock(&spu->register_lock); + + return IRQ_HANDLED; +} + +static irqreturn_t +spu_irq_class_1(int irq, void *data) +{ + struct spu *spu; + unsigned long stat, mask, dar, dsisr; + + spu = data; + + /* atomically read & clear class1 status. */ + spin_lock(&spu->register_lock); + mask = spu_int_mask_get(spu, 1); + stat = spu_int_stat_get(spu, 1) & mask; + dar = spu_mfc_dar_get(spu); + dsisr = spu_mfc_dsisr_get(spu); + if (stat & CLASS1_STORAGE_FAULT_INTR) + spu_mfc_dsisr_set(spu, 0ul); + spu_int_stat_clear(spu, 1, stat); + + pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat, + dar, dsisr); + + if (stat & CLASS1_SEGMENT_FAULT_INTR) + __spu_trap_data_seg(spu, dar); + + if (stat & CLASS1_STORAGE_FAULT_INTR) + __spu_trap_data_map(spu, dar, dsisr); + + spu->class_1_dsisr = 0; + spu->class_1_dar = 0; + + spin_unlock(&spu->register_lock); + + return stat ? IRQ_HANDLED : IRQ_NONE; +} + +static irqreturn_t +spu_irq_class_2(int irq, void *data) +{ + struct spu *spu; + unsigned long stat; + unsigned long mask; + const int mailbox_intrs = + CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR; + + spu = data; + spin_lock(&spu->register_lock); + stat = spu_int_stat_get(spu, 2); + mask = spu_int_mask_get(spu, 2); + /* ignore interrupts we're not waiting for */ + stat &= mask; + /* mailbox interrupts are level triggered. mask them now before + * acknowledging */ + if (stat & mailbox_intrs) + spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs)); + /* acknowledge all interrupts before the callbacks */ + spu_int_stat_clear(spu, 2, stat); + + pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask); + + if (stat & CLASS2_MAILBOX_INTR) + spu->ibox_callback(spu); + + if (stat & CLASS2_SPU_STOP_INTR) + spu->stop_callback(spu, 2); + + if (stat & CLASS2_SPU_HALT_INTR) + spu->stop_callback(spu, 2); + + if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR) + spu->mfc_callback(spu); + + if (stat & CLASS2_MAILBOX_THRESHOLD_INTR) + spu->wbox_callback(spu); + + spu->stats.class2_intr++; + + spin_unlock(&spu->register_lock); + + return stat ? IRQ_HANDLED : IRQ_NONE; +} + +static int __init spu_request_irqs(struct spu *spu) +{ + int ret = 0; + + if (spu->irqs[0]) { + snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", + spu->number); + ret = request_irq(spu->irqs[0], spu_irq_class_0, + 0, spu->irq_c0, spu); + if (ret) + goto bail0; + } + if (spu->irqs[1]) { + snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", + spu->number); + ret = request_irq(spu->irqs[1], spu_irq_class_1, + 0, spu->irq_c1, spu); + if (ret) + goto bail1; + } + if (spu->irqs[2]) { + snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", + spu->number); + ret = request_irq(spu->irqs[2], spu_irq_class_2, + 0, spu->irq_c2, spu); + if (ret) + goto bail2; + } + return 0; + +bail2: + if (spu->irqs[1]) + free_irq(spu->irqs[1], spu); +bail1: + if (spu->irqs[0]) + free_irq(spu->irqs[0], spu); +bail0: + return ret; +} + +static void spu_free_irqs(struct spu *spu) +{ + if (spu->irqs[0]) + free_irq(spu->irqs[0], spu); + if (spu->irqs[1]) + free_irq(spu->irqs[1], spu); + if (spu->irqs[2]) + free_irq(spu->irqs[2], spu); +} + +void spu_init_channels(struct spu *spu) +{ + static const struct { + unsigned channel; + unsigned count; + } zero_list[] = { + { 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, }, + { 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, }, + }, count_list[] = { + { 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, }, + { 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, }, + { 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, }, + }; + struct spu_priv2 __iomem *priv2; + int i; + + priv2 = spu->priv2; + + /* initialize all channel data to zero */ + for (i = 0; i < ARRAY_SIZE(zero_list); i++) { + int count; + + out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel); + for (count = 0; count < zero_list[i].count; count++) + out_be64(&priv2->spu_chnldata_RW, 0); + } + + /* initialize channel counts to meaningful values */ + for (i = 0; i < ARRAY_SIZE(count_list); i++) { + out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel); + out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); + } +} +EXPORT_SYMBOL_GPL(spu_init_channels); + +static struct bus_type spu_subsys = { + .name = "spu", + .dev_name = "spu", +}; + +int spu_add_dev_attr(struct device_attribute *attr) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) + device_create_file(&spu->dev, attr); + mutex_unlock(&spu_full_list_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(spu_add_dev_attr); + +int spu_add_dev_attr_group(const struct attribute_group *attrs) +{ + struct spu *spu; + int rc = 0; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) { + rc = sysfs_create_group(&spu->dev.kobj, attrs); + + /* we're in trouble here, but try unwinding anyway */ + if (rc) { + printk(KERN_ERR "%s: can't create sysfs group '%s'\n", + __func__, attrs->name); + + list_for_each_entry_continue_reverse(spu, + &spu_full_list, full_list) + sysfs_remove_group(&spu->dev.kobj, attrs); + break; + } + } + + mutex_unlock(&spu_full_list_mutex); + + return rc; +} +EXPORT_SYMBOL_GPL(spu_add_dev_attr_group); + + +void spu_remove_dev_attr(struct device_attribute *attr) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) + device_remove_file(&spu->dev, attr); + mutex_unlock(&spu_full_list_mutex); +} +EXPORT_SYMBOL_GPL(spu_remove_dev_attr); + +void spu_remove_dev_attr_group(const struct attribute_group *attrs) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) + sysfs_remove_group(&spu->dev.kobj, attrs); + mutex_unlock(&spu_full_list_mutex); +} +EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group); + +static int __init spu_create_dev(struct spu *spu) +{ + int ret; + + spu->dev.id = spu->number; + spu->dev.bus = &spu_subsys; + ret = device_register(&spu->dev); + if (ret) { + printk(KERN_ERR "Can't register SPU %d with sysfs\n", + spu->number); + return ret; + } + + sysfs_add_device_to_node(&spu->dev, spu->node); + + return 0; +} + +static int __init create_spu(void *data) +{ + struct spu *spu; + int ret; + static int number; + unsigned long flags; + + ret = -ENOMEM; + spu = kzalloc(sizeof (*spu), GFP_KERNEL); + if (!spu) + goto out; + + spu->alloc_state = SPU_FREE; + + spin_lock_init(&spu->register_lock); + spin_lock(&spu_lock); + spu->number = number++; + spin_unlock(&spu_lock); + + ret = spu_create_spu(spu, data); + + if (ret) + goto out_free; + + spu_mfc_sdr_setup(spu); + spu_mfc_sr1_set(spu, 0x33); + ret = spu_request_irqs(spu); + if (ret) + goto out_destroy; + + ret = spu_create_dev(spu); + if (ret) + goto out_free_irqs; + + mutex_lock(&cbe_spu_info[spu->node].list_mutex); + list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); + cbe_spu_info[spu->node].n_spus++; + mutex_unlock(&cbe_spu_info[spu->node].list_mutex); + + mutex_lock(&spu_full_list_mutex); + spin_lock_irqsave(&spu_full_list_lock, flags); + list_add(&spu->full_list, &spu_full_list); + spin_unlock_irqrestore(&spu_full_list_lock, flags); + mutex_unlock(&spu_full_list_mutex); + + spu->stats.util_state = SPU_UTIL_IDLE_LOADED; + spu->stats.tstamp = ktime_get_ns(); + + INIT_LIST_HEAD(&spu->aff_list); + + goto out; + +out_free_irqs: + spu_free_irqs(spu); +out_destroy: + spu_destroy_spu(spu); +out_free: + kfree(spu); +out: + return ret; +} + +static const char *spu_state_names[] = { + "user", "system", "iowait", "idle" +}; + +static unsigned long long spu_acct_time(struct spu *spu, + enum spu_utilization_state state) +{ + unsigned long long time = spu->stats.times[state]; + + /* + * If the spu is idle or the context is stopped, utilization + * statistics are not updated. Apply the time delta from the + * last recorded state of the spu. + */ + if (spu->stats.util_state == state) + time += ktime_get_ns() - spu->stats.tstamp; + + return time / NSEC_PER_MSEC; +} + + +static ssize_t spu_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct spu *spu = container_of(dev, struct spu, dev); + + return sprintf(buf, "%s %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + spu_state_names[spu->stats.util_state], + spu_acct_time(spu, SPU_UTIL_USER), + spu_acct_time(spu, SPU_UTIL_SYSTEM), + spu_acct_time(spu, SPU_UTIL_IOWAIT), + spu_acct_time(spu, SPU_UTIL_IDLE_LOADED), + spu->stats.vol_ctx_switch, + spu->stats.invol_ctx_switch, + spu->stats.slb_flt, + spu->stats.hash_flt, + spu->stats.min_flt, + spu->stats.maj_flt, + spu->stats.class2_intr, + spu->stats.libassist); +} + +static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL); + +#ifdef CONFIG_KEXEC_CORE + +struct crash_spu_info { + struct spu *spu; + u32 saved_spu_runcntl_RW; + u32 saved_spu_status_R; + u32 saved_spu_npc_RW; + u64 saved_mfc_sr1_RW; + u64 saved_mfc_dar; + u64 saved_mfc_dsisr; +}; + +#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ +static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; + +static void crash_kexec_stop_spus(void) +{ + struct spu *spu; + int i; + u64 tmp; + + for (i = 0; i < CRASH_NUM_SPUS; i++) { + if (!crash_spu_info[i].spu) + continue; + + spu = crash_spu_info[i].spu; + + crash_spu_info[i].saved_spu_runcntl_RW = + in_be32(&spu->problem->spu_runcntl_RW); + crash_spu_info[i].saved_spu_status_R = + in_be32(&spu->problem->spu_status_R); + crash_spu_info[i].saved_spu_npc_RW = + in_be32(&spu->problem->spu_npc_RW); + + crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); + crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); + tmp = spu_mfc_sr1_get(spu); + crash_spu_info[i].saved_mfc_sr1_RW = tmp; + + tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, tmp); + + __delay(200); + } +} + +static void __init crash_register_spus(struct list_head *list) +{ + struct spu *spu; + int ret; + + list_for_each_entry(spu, list, full_list) { + if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) + continue; + + crash_spu_info[spu->number].spu = spu; + } + + ret = crash_shutdown_register(&crash_kexec_stop_spus); + if (ret) + printk(KERN_ERR "Could not register SPU crash handler"); +} + +#else +static inline void crash_register_spus(struct list_head *list) +{ +} +#endif + +static void spu_shutdown(void) +{ + struct spu *spu; + + mutex_lock(&spu_full_list_mutex); + list_for_each_entry(spu, &spu_full_list, full_list) { + spu_free_irqs(spu); + spu_destroy_spu(spu); + } + mutex_unlock(&spu_full_list_mutex); +} + +static struct syscore_ops spu_syscore_ops = { + .shutdown = spu_shutdown, +}; + +static int __init init_spu_base(void) +{ + int i, ret = 0; + + for (i = 0; i < MAX_NUMNODES; i++) { + mutex_init(&cbe_spu_info[i].list_mutex); + INIT_LIST_HEAD(&cbe_spu_info[i].spus); + } + + if (!spu_management_ops) + goto out; + + /* create system subsystem for spus */ + ret = subsys_system_register(&spu_subsys, NULL); + if (ret) + goto out; + + ret = spu_enumerate_spus(create_spu); + + if (ret < 0) { + printk(KERN_WARNING "%s: Error initializing spus\n", + __func__); + goto out_unregister_subsys; + } + + if (ret > 0) + fb_append_extra_logo(&logo_spe_clut224, ret); + + mutex_lock(&spu_full_list_mutex); + xmon_register_spus(&spu_full_list); + crash_register_spus(&spu_full_list); + mutex_unlock(&spu_full_list_mutex); + spu_add_dev_attr(&dev_attr_stat); + register_syscore_ops(&spu_syscore_ops); + + spu_init_affinity(); + + return 0; + + out_unregister_subsys: + bus_unregister(&spu_subsys); + out: + return ret; +} +device_initcall(init_spu_base); diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c new file mode 100644 index 0000000000..e780c14c57 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * System call callback functions for SPUs + */ + +#undef DEBUG + +#include +#include +#include + +#include +#include +#include + +/* + * This table defines the system calls that an SPU can call. + * It is currently a subset of the 64 bit powerpc system calls, + * with the exact semantics. + * + * The reasons for disabling some of the system calls are: + * 1. They interact with the way SPU syscalls are handled + * and we can't let them execute ever: + * restart_syscall, exit, for, execve, ptrace, ... + * 2. They are deprecated and replaced by other means: + * uselib, pciconfig_*, sysfs, ... + * 3. They are somewhat interacting with the system in a way + * we don't want an SPU to: + * reboot, init_module, mount, kexec_load + * 4. They are optional and we can't rely on them being + * linked into the kernel. Unfortunately, the cond_syscall + * helper does not work here as it does not add the necessary + * opd symbols: + * mbind, mq_open, ipc, ... + */ + +static const syscall_fn spu_syscall_table[] = { +#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry) +#define __SYSCALL(nr, entry) [nr] = (void *) entry, +#include +}; + +long spu_sys_callback(struct spu_syscall_block *s) +{ + syscall_fn syscall; + + if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) { + pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret); + return -ENOSYS; + } + + syscall = spu_syscall_table[s->nr_ret]; + + pr_debug("SPU-syscall " + "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n", + syscall, + s->nr_ret, + s->parm[0], s->parm[1], s->parm[2], + s->parm[3], s->parm[4], s->parm[5]); + + return syscall(s->parm[0], s->parm[1], s->parm[2], + s->parm[3], s->parm[4], s->parm[5]); +} +EXPORT_SYMBOL_GPL(spu_sys_callback); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c new file mode 100644 index 0000000000..f464a1f2e5 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * spu management operations for of based platforms + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * Copyright 2006 Sony Corp. + * (C) Copyright 2007 TOSHIBA CORPORATION + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "spufs/spufs.h" +#include "interrupt.h" +#include "spu_priv1_mmio.h" + +struct device_node *spu_devnode(struct spu *spu) +{ + return spu->devnode; +} + +EXPORT_SYMBOL_GPL(spu_devnode); + +static u64 __init find_spu_unit_number(struct device_node *spe) +{ + const unsigned int *prop; + int proplen; + + /* new device trees should provide the physical-id attribute */ + prop = of_get_property(spe, "physical-id", &proplen); + if (proplen == 4) + return (u64)*prop; + + /* celleb device tree provides the unit-id */ + prop = of_get_property(spe, "unit-id", &proplen); + if (proplen == 4) + return (u64)*prop; + + /* legacy device trees provide the id in the reg attribute */ + prop = of_get_property(spe, "reg", &proplen); + if (proplen == 4) + return (u64)*prop; + + return 0; +} + +static void spu_unmap(struct spu *spu) +{ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + iounmap(spu->priv1); + iounmap(spu->priv2); + iounmap(spu->problem); + iounmap((__force u8 __iomem *)spu->local_store); +} + +static int __init spu_map_interrupts_old(struct spu *spu, + struct device_node *np) +{ + unsigned int isrc; + const u32 *tmp; + int nid; + + /* Get the interrupt source unit from the device-tree */ + tmp = of_get_property(np, "isrc", NULL); + if (!tmp) + return -ENODEV; + isrc = tmp[0]; + + tmp = of_get_property(np->parent->parent, "node-id", NULL); + if (!tmp) { + printk(KERN_WARNING "%s: can't find node-id\n", __func__); + nid = spu->node; + } else + nid = tmp[0]; + + /* Add the node number */ + isrc |= nid << IIC_IRQ_NODE_SHIFT; + + /* Now map interrupts of all 3 classes */ + spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc); + spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc); + spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc); + + /* Right now, we only fail if class 2 failed */ + if (!spu->irqs[2]) + return -EINVAL; + + return 0; +} + +static void __iomem * __init spu_map_prop_old(struct spu *spu, + struct device_node *n, + const char *name) +{ + const struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + int proplen; + + prop = of_get_property(n, name, &proplen); + if (prop == NULL || proplen != sizeof (struct address_prop)) + return NULL; + + return ioremap(prop->address, prop->len); +} + +static int __init spu_map_device_old(struct spu *spu) +{ + struct device_node *node = spu->devnode; + const char *prop; + int ret; + + ret = -ENODEV; + spu->name = of_get_property(node, "name", NULL); + if (!spu->name) + goto out; + + prop = of_get_property(node, "local-store", NULL); + if (!prop) + goto out; + spu->local_store_phys = *(unsigned long *)prop; + + /* we use local store as ram, not io memory */ + spu->local_store = (void __force *) + spu_map_prop_old(spu, node, "local-store"); + if (!spu->local_store) + goto out; + + prop = of_get_property(node, "problem", NULL); + if (!prop) + goto out_unmap; + spu->problem_phys = *(unsigned long *)prop; + + spu->problem = spu_map_prop_old(spu, node, "problem"); + if (!spu->problem) + goto out_unmap; + + spu->priv2 = spu_map_prop_old(spu, node, "priv2"); + if (!spu->priv2) + goto out_unmap; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + spu->priv1 = spu_map_prop_old(spu, node, "priv1"); + if (!spu->priv1) + goto out_unmap; + } + + ret = 0; + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) +{ + int i; + + for (i=0; i < 3; i++) { + spu->irqs[i] = irq_of_parse_and_map(np, i); + if (!spu->irqs[i]) + goto err; + } + return 0; + +err: + pr_debug("failed to map irq %x for spu %s\n", i, spu->name); + for (; i >= 0; i--) { + if (spu->irqs[i]) + irq_dispose_mapping(spu->irqs[i]); + } + return -EINVAL; +} + +static int __init spu_map_resource(struct spu *spu, int nr, + void __iomem** virt, unsigned long *phys) +{ + struct device_node *np = spu->devnode; + struct resource resource = { }; + unsigned long len; + int ret; + + ret = of_address_to_resource(np, nr, &resource); + if (ret) + return ret; + if (phys) + *phys = resource.start; + len = resource_size(&resource); + *virt = ioremap(resource.start, len); + if (!*virt) + return -EINVAL; + return 0; +} + +static int __init spu_map_device(struct spu *spu) +{ + struct device_node *np = spu->devnode; + int ret = -ENODEV; + + spu->name = of_get_property(np, "name", NULL); + if (!spu->name) + goto out; + + ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store, + &spu->local_store_phys); + if (ret) { + pr_debug("spu_new: failed to map %pOF resource 0\n", + np); + goto out; + } + ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem, + &spu->problem_phys); + if (ret) { + pr_debug("spu_new: failed to map %pOF resource 1\n", + np); + goto out_unmap; + } + ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL); + if (ret) { + pr_debug("spu_new: failed to map %pOF resource 2\n", + np); + goto out_unmap; + } + if (!firmware_has_feature(FW_FEATURE_LPAR)) + ret = spu_map_resource(spu, 3, + (void __iomem**)&spu->priv1, NULL); + if (ret) { + pr_debug("spu_new: failed to map %pOF resource 3\n", + np); + goto out_unmap; + } + pr_debug("spu_new: %pOF maps:\n", np); + pr_debug(" local store : 0x%016lx -> 0x%p\n", + spu->local_store_phys, spu->local_store); + pr_debug(" problem state : 0x%016lx -> 0x%p\n", + spu->problem_phys, spu->problem); + pr_debug(" priv2 : 0x%p\n", spu->priv2); + pr_debug(" priv1 : 0x%p\n", spu->priv1); + + return 0; + +out_unmap: + spu_unmap(spu); +out: + pr_debug("failed to map spe %s: %d\n", spu->name, ret); + return ret; +} + +static int __init of_enumerate_spus(int (*fn)(void *data)) +{ + int ret; + struct device_node *node; + unsigned int n = 0; + + ret = -ENODEV; + for_each_node_by_type(node, "spe") { + ret = fn(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %pOFn\n", + __func__, node); + of_node_put(node); + break; + } + n++; + } + return ret ? ret : n; +} + +static int __init of_create_spu(struct spu *spu, void *data) +{ + int ret; + struct device_node *spe = (struct device_node *)data; + static int legacy_map = 0, legacy_irq = 0; + + spu->devnode = of_node_get(spe); + spu->spe_id = find_spu_unit_number(spe); + + spu->node = of_node_to_nid(spe); + if (spu->node >= MAX_NUMNODES) { + printk(KERN_WARNING "SPE %pOF on node %d ignored," + " node number too big\n", spe, spu->node); + printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n"); + ret = -ENODEV; + goto out; + } + + ret = spu_map_device(spu); + if (ret) { + if (!legacy_map) { + legacy_map = 1; + printk(KERN_WARNING "%s: Legacy device tree found, " + "trying to map old style\n", __func__); + } + ret = spu_map_device_old(spu); + if (ret) { + printk(KERN_ERR "Unable to map %s\n", + spu->name); + goto out; + } + } + + ret = spu_map_interrupts(spu, spe); + if (ret) { + if (!legacy_irq) { + legacy_irq = 1; + printk(KERN_WARNING "%s: Legacy device tree found, " + "trying old style irq\n", __func__); + } + ret = spu_map_interrupts_old(spu, spe); + if (ret) { + printk(KERN_ERR "%s: could not map interrupts\n", + spu->name); + goto out_unmap; + } + } + + pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name, + spu->local_store, spu->problem, spu->priv1, + spu->priv2, spu->number); + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int of_destroy_spu(struct spu *spu) +{ + spu_unmap(spu); + of_node_put(spu->devnode); + return 0; +} + +static void enable_spu_by_master_run(struct spu_context *ctx) +{ + ctx->ops->master_start(ctx); +} + +static void disable_spu_by_master_run(struct spu_context *ctx) +{ + ctx->ops->master_stop(ctx); +} + +/* Hardcoded affinity idxs for qs20 */ +#define QS20_SPES_PER_BE 8 +static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; +static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; + +static struct spu *__init spu_lookup_reg(int node, u32 reg) +{ + struct spu *spu; + const u32 *spu_reg; + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + spu_reg = of_get_property(spu_devnode(spu), "reg", NULL); + if (*spu_reg == reg) + return spu; + } + return NULL; +} + +static void __init init_affinity_qs20_harcoded(void) +{ + int node, i; + struct spu *last_spu, *spu; + u32 reg; + + for (node = 0; node < MAX_NUMNODES; node++) { + last_spu = NULL; + for (i = 0; i < QS20_SPES_PER_BE; i++) { + reg = qs20_reg_idxs[i]; + spu = spu_lookup_reg(node, reg); + if (!spu) + continue; + spu->has_mem_affinity = qs20_reg_memory[reg]; + if (last_spu) + list_add_tail(&spu->aff_list, + &last_spu->aff_list); + last_spu = spu; + } + } +} + +static int __init of_has_vicinity(void) +{ + struct device_node *dn; + + for_each_node_by_type(dn, "spe") { + if (of_property_present(dn, "vicinity")) { + of_node_put(dn); + return 1; + } + } + return 0; +} + +static struct spu *__init devnode_spu(int cbe, struct device_node *dn) +{ + struct spu *spu; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) + if (spu_devnode(spu) == dn) + return spu; + return NULL; +} + +static struct spu * __init +neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) +{ + struct spu *spu; + struct device_node *spu_dn; + const phandle *vic_handles; + int lenp, i; + + list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) { + spu_dn = spu_devnode(spu); + if (spu_dn == avoid) + continue; + vic_handles = of_get_property(spu_dn, "vicinity", &lenp); + for (i=0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == target->phandle) + return spu; + } + } + return NULL; +} + +static void __init init_affinity_node(int cbe) +{ + struct spu *spu, *last_spu; + struct device_node *vic_dn, *last_spu_dn; + phandle avoid_ph; + const phandle *vic_handles; + int lenp, i, added; + + last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu, + cbe_list); + avoid_ph = 0; + for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) { + last_spu_dn = spu_devnode(last_spu); + vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp); + + /* + * Walk through each phandle in vicinity property of the spu + * (typically two vicinity phandles per spe node) + */ + for (i = 0; i < (lenp / sizeof(phandle)); i++) { + if (vic_handles[i] == avoid_ph) + continue; + + vic_dn = of_find_node_by_phandle(vic_handles[i]); + if (!vic_dn) + continue; + + if (of_node_name_eq(vic_dn, "spe") ) { + spu = devnode_spu(cbe, vic_dn); + avoid_ph = last_spu_dn->phandle; + } else { + /* + * "mic-tm" and "bif0" nodes do not have + * vicinity property. So we need to find the + * spe which has vic_dn as neighbour, but + * skipping the one we came from (last_spu_dn) + */ + spu = neighbour_spu(cbe, vic_dn, last_spu_dn); + if (!spu) + continue; + if (of_node_name_eq(vic_dn, "mic-tm")) { + last_spu->has_mem_affinity = 1; + spu->has_mem_affinity = 1; + } + avoid_ph = vic_dn->phandle; + } + + of_node_put(vic_dn); + + list_add_tail(&spu->aff_list, &last_spu->aff_list); + last_spu = spu; + break; + } + } +} + +static void __init init_affinity_fw(void) +{ + int cbe; + + for (cbe = 0; cbe < MAX_NUMNODES; cbe++) + init_affinity_node(cbe); +} + +static int __init init_affinity(void) +{ + if (of_has_vicinity()) { + init_affinity_fw(); + } else { + if (of_machine_is_compatible("IBM,CPBW-1.0")) + init_affinity_qs20_harcoded(); + else + printk("No affinity configuration found\n"); + } + + return 0; +} + +const struct spu_management_ops spu_management_of_ops = { + .enumerate_spus = of_enumerate_spus, + .create_spu = of_create_spu, + .destroy_spu = of_destroy_spu, + .enable_spu = enable_spu_by_master_run, + .disable_spu = disable_spu_by_master_run, + .init_affinity = init_affinity, +}; diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c new file mode 100644 index 0000000000..d150e39873 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * spu hypervisor abstraction for direct hardware access. + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "interrupt.h" +#include "spu_priv1_mmio.h" + +static void int_mask_and(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask); +} + +static void int_mask_or(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + old_mask = in_be64(&spu->priv1->int_mask_RW[class]); + out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask); +} + +static void int_mask_set(struct spu *spu, int class, u64 mask) +{ + out_be64(&spu->priv1->int_mask_RW[class], mask); +} + +static u64 int_mask_get(struct spu *spu, int class) +{ + return in_be64(&spu->priv1->int_mask_RW[class]); +} + +static void int_stat_clear(struct spu *spu, int class, u64 stat) +{ + out_be64(&spu->priv1->int_stat_RW[class], stat); +} + +static u64 int_stat_get(struct spu *spu, int class) +{ + return in_be64(&spu->priv1->int_stat_RW[class]); +} + +static void cpu_affinity_set(struct spu *spu, int cpu) +{ + u64 target; + u64 route; + + if (nr_cpus_node(spu->node)) { + const struct cpumask *spumask = cpumask_of_node(spu->node), + *cpumask = cpumask_of_node(cpu_to_node(cpu)); + + if (!cpumask_intersects(spumask, cpumask)) + return; + } + + target = iic_get_target_id(cpu); + route = target << 48 | target << 32 | target << 16; + out_be64(&spu->priv1->int_route_RW, route); +} + +static u64 mfc_dar_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_dar_RW); +} + +static u64 mfc_dsisr_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_dsisr_RW); +} + +static void mfc_dsisr_set(struct spu *spu, u64 dsisr) +{ + out_be64(&spu->priv1->mfc_dsisr_RW, dsisr); +} + +static void mfc_sdr_setup(struct spu *spu) +{ + out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); +} + +static void mfc_sr1_set(struct spu *spu, u64 sr1) +{ + out_be64(&spu->priv1->mfc_sr1_RW, sr1); +} + +static u64 mfc_sr1_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_sr1_RW); +} + +static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id) +{ + out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id); +} + +static u64 mfc_tclass_id_get(struct spu *spu) +{ + return in_be64(&spu->priv1->mfc_tclass_id_RW); +} + +static void tlb_invalidate(struct spu *spu) +{ + out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul); +} + +static void resource_allocation_groupID_set(struct spu *spu, u64 id) +{ + out_be64(&spu->priv1->resource_allocation_groupID_RW, id); +} + +static u64 resource_allocation_groupID_get(struct spu *spu) +{ + return in_be64(&spu->priv1->resource_allocation_groupID_RW); +} + +static void resource_allocation_enable_set(struct spu *spu, u64 enable) +{ + out_be64(&spu->priv1->resource_allocation_enable_RW, enable); +} + +static u64 resource_allocation_enable_get(struct spu *spu) +{ + return in_be64(&spu->priv1->resource_allocation_enable_RW); +} + +const struct spu_priv1_ops spu_priv1_mmio_ops = +{ + .int_mask_and = int_mask_and, + .int_mask_or = int_mask_or, + .int_mask_set = int_mask_set, + .int_mask_get = int_mask_get, + .int_stat_clear = int_stat_clear, + .int_stat_get = int_stat_get, + .cpu_affinity_set = cpu_affinity_set, + .mfc_dar_get = mfc_dar_get, + .mfc_dsisr_get = mfc_dsisr_get, + .mfc_dsisr_set = mfc_dsisr_set, + .mfc_sdr_setup = mfc_sdr_setup, + .mfc_sr1_set = mfc_sr1_set, + .mfc_sr1_get = mfc_sr1_get, + .mfc_tclass_id_set = mfc_tclass_id_set, + .mfc_tclass_id_get = mfc_tclass_id_get, + .tlb_invalidate = tlb_invalidate, + .resource_allocation_groupID_set = resource_allocation_groupID_set, + .resource_allocation_groupID_get = resource_allocation_groupID_get, + .resource_allocation_enable_set = resource_allocation_enable_set, + .resource_allocation_enable_get = resource_allocation_enable_get, +}; diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h new file mode 100644 index 0000000000..04f0db339d --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * spu hypervisor abstraction for direct hardware access. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#ifndef SPU_PRIV1_MMIO_H +#define SPU_PRIV1_MMIO_H + +struct device_node *spu_devnode(struct spu *spu); + +#endif /* SPU_PRIV1_MMIO_H */ diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c new file mode 100644 index 0000000000..87ad7d563c --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SPU file system -- system call stubs + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * (C) Copyright 2006-2007, IBM Corporation + * + * Author: Arnd Bergmann + */ +#include +#include +#include +#include +#include +#include + +#include + +/* protected by rcu */ +static struct spufs_calls *spufs_calls; + +#ifdef CONFIG_SPU_FS_MODULE + +static inline struct spufs_calls *spufs_calls_get(void) +{ + struct spufs_calls *calls = NULL; + + rcu_read_lock(); + calls = rcu_dereference(spufs_calls); + if (calls && !try_module_get(calls->owner)) + calls = NULL; + rcu_read_unlock(); + + return calls; +} + +static inline void spufs_calls_put(struct spufs_calls *calls) +{ + BUG_ON(calls != spufs_calls); + + /* we don't need to rcu this, as we hold a reference to the module */ + module_put(spufs_calls->owner); +} + +#else /* !defined CONFIG_SPU_FS_MODULE */ + +static inline struct spufs_calls *spufs_calls_get(void) +{ + return spufs_calls; +} + +static inline void spufs_calls_put(struct spufs_calls *calls) { } + +#endif /* CONFIG_SPU_FS_MODULE */ + +SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, + umode_t, mode, int, neighbor_fd) +{ + long ret; + struct spufs_calls *calls; + + calls = spufs_calls_get(); + if (!calls) + return -ENOSYS; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + struct fd neighbor = fdget(neighbor_fd); + ret = -EBADF; + if (neighbor.file) { + ret = calls->create_thread(name, flags, mode, neighbor.file); + fdput(neighbor); + } + } else + ret = calls->create_thread(name, flags, mode, NULL); + + spufs_calls_put(calls); + return ret; +} + +SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus) +{ + long ret; + struct fd arg; + struct spufs_calls *calls; + + calls = spufs_calls_get(); + if (!calls) + return -ENOSYS; + + ret = -EBADF; + arg = fdget(fd); + if (arg.file) { + ret = calls->spu_run(arg.file, unpc, ustatus); + fdput(arg); + } + + spufs_calls_put(calls); + return ret; +} + +#ifdef CONFIG_COREDUMP +int elf_coredump_extra_notes_size(void) +{ + struct spufs_calls *calls; + int ret; + + calls = spufs_calls_get(); + if (!calls) + return 0; + + ret = calls->coredump_extra_notes_size(); + + spufs_calls_put(calls); + + return ret; +} + +int elf_coredump_extra_notes_write(struct coredump_params *cprm) +{ + struct spufs_calls *calls; + int ret; + + calls = spufs_calls_get(); + if (!calls) + return 0; + + ret = calls->coredump_extra_notes_write(cprm); + + spufs_calls_put(calls); + + return ret; +} +#endif + +void notify_spus_active(void) +{ + struct spufs_calls *calls; + + calls = spufs_calls_get(); + if (!calls) + return; + + calls->notify_spus_active(); + spufs_calls_put(calls); + + return; +} + +int register_spu_syscalls(struct spufs_calls *calls) +{ + if (spufs_calls) + return -EBUSY; + + rcu_assign_pointer(spufs_calls, calls); + return 0; +} +EXPORT_SYMBOL_GPL(register_spu_syscalls); + +void unregister_spu_syscalls(struct spufs_calls *calls) +{ + BUG_ON(spufs_calls->owner != calls->owner); + RCU_INIT_POINTER(spufs_calls, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(unregister_spu_syscalls); diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore new file mode 100644 index 0000000000..5f3eb224f6 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +spu_save_dump.h +spu_restore_dump.h diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile new file mode 100644 index 0000000000..52e4c80ec8 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_SPU_FS) += spufs.o +spufs-y += inode.o file.o context.o syscalls.o +spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o +spufs-y += switch.o fault.o lscsa_alloc.o +spufs-$(CONFIG_COREDUMP) += coredump.o + +# magic for the trace events +CFLAGS_sched.o := -I$(src) + +# Rules to build switch.o with the help of SPU tool chain +SPU_CROSS := spu- +SPU_CC := $(SPU_CROSS)gcc +SPU_AS := $(SPU_CROSS)gcc +SPU_LD := $(SPU_CROSS)ld +SPU_OBJCOPY := $(SPU_CROSS)objcopy +SPU_CFLAGS := -O2 -Wall -I$(srctree)/include -D__KERNEL__ +SPU_AFLAGS := -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__ +SPU_LDFLAGS := -N -Ttext=0x0 + +$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h +clean-files := spu_save_dump.h spu_restore_dump.h + +# Compile SPU files + cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $< +quiet_cmd_spu_cc = SPU_CC $@ +$(obj)/spu_%.o: $(src)/spu_%.c + $(call if_changed,spu_cc) + +# Assemble SPU files + cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $< +quiet_cmd_spu_as = SPU_AS $@ +$(obj)/spu_%.o: $(src)/spu_%.S + $(call if_changed,spu_as) + +# Link SPU Executables + cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^ +quiet_cmd_spu_ld = SPU_LD $@ +$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o + $(call if_changed,spu_ld) + +# Copy into binary format + cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@ +quiet_cmd_spu_objcopy = OBJCOPY $@ +$(obj)/spu_%.bin: $(src)/spu_% + $(call if_changed,spu_objcopy) + +# create C code from ELF executable +cmd_hexdump = ( \ + echo "/*" ; \ + echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \ + echo " * Hex-dump auto generated from $*.c." ; \ + echo " * Do not edit!" ; \ + echo " */" ; \ + echo "static unsigned int $*_code[] " \ + "__attribute__((__aligned__(128))) = {" ; \ + hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \ + echo "};" ; \ + ) > $@ +quiet_cmd_hexdump = HEXDUMP $@ +$(obj)/%_dump.h: $(obj)/%.bin + $(call if_changed,hexdump) diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c new file mode 100644 index 0000000000..28a34a2caa --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -0,0 +1,400 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* backing_ops.c - query/set operations on saved SPU context. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter + * + * These register operations allow SPUFS to operate on saved + * SPU contexts rather than hardware. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "spufs.h" + +/* + * Reads/writes to various problem and priv2 registers require + * state changes, i.e. generate SPU events, modify channel + * counts, etc. + */ + +static void gen_spu_event(struct spu_context *ctx, u32 event) +{ + u64 ch0_cnt; + u64 ch0_data; + u64 ch1_data; + + ch0_cnt = ctx->csa.spu_chnlcnt_RW[0]; + ch0_data = ctx->csa.spu_chnldata_RW[0]; + ch1_data = ctx->csa.spu_chnldata_RW[1]; + ctx->csa.spu_chnldata_RW[0] |= event; + if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) { + ctx->csa.spu_chnlcnt_RW[0] = 1; + } +} + +static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data) +{ + u32 mbox_stat; + int ret = 0; + + spin_lock(&ctx->csa.register_lock); + mbox_stat = ctx->csa.prob.mb_stat_R; + if (mbox_stat & 0x0000ff) { + /* Read the first available word. + * Implementation note: the depth + * of pu_mb_R is currently 1. + */ + *data = ctx->csa.prob.pu_mb_R; + ctx->csa.prob.mb_stat_R &= ~(0x0000ff); + ctx->csa.spu_chnlcnt_RW[28] = 1; + gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT); + ret = 4; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static u32 spu_backing_mbox_stat_read(struct spu_context *ctx) +{ + return ctx->csa.prob.mb_stat_R; +} + +static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx, + __poll_t events) +{ + __poll_t ret; + u32 stat; + + ret = 0; + spin_lock_irq(&ctx->csa.register_lock); + stat = ctx->csa.prob.mb_stat_R; + + /* if the requested event is there, return the poll + mask, otherwise enable the interrupt to get notified, + but first mark any pending interrupts as done so + we don't get woken up unnecessarily */ + + if (events & (EPOLLIN | EPOLLRDNORM)) { + if (stat & 0xff0000) + ret |= EPOLLIN | EPOLLRDNORM; + else { + ctx->csa.priv1.int_stat_class2_RW &= + ~CLASS2_MAILBOX_INTR; + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_INTR; + } + } + if (events & (EPOLLOUT | EPOLLWRNORM)) { + if (stat & 0x00ff00) + ret = EPOLLOUT | EPOLLWRNORM; + else { + ctx->csa.priv1.int_stat_class2_RW &= + ~CLASS2_MAILBOX_THRESHOLD_INTR; + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR; + } + } + spin_unlock_irq(&ctx->csa.register_lock); + return ret; +} + +static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.prob.mb_stat_R & 0xff0000) { + /* Read the first available word. + * Implementation note: the depth + * of puint_mb_R is currently 1. + */ + *data = ctx->csa.priv2.puint_mb_R; + ctx->csa.prob.mb_stat_R &= ~(0xff0000); + ctx->csa.spu_chnlcnt_RW[30] = 1; + gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT); + ret = 4; + } else { + /* make sure we get woken up by the interrupt */ + ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR; + ret = 0; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static int spu_backing_wbox_write(struct spu_context *ctx, u32 data) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) { + int slot = ctx->csa.spu_chnlcnt_RW[29]; + int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8; + + /* We have space to write wbox_data. + * Implementation note: the depth + * of spu_mb_W is currently 4. + */ + BUG_ON(avail != (4 - slot)); + ctx->csa.spu_mailbox_data[slot] = data; + ctx->csa.spu_chnlcnt_RW[29] = ++slot; + ctx->csa.prob.mb_stat_R &= ~(0x00ff00); + ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8); + gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT); + ret = 4; + } else { + /* make sure we get woken up by the interrupt when space + becomes available */ + ctx->csa.priv1.int_mask_class2_RW |= + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR; + ret = 0; + } + spin_unlock(&ctx->csa.register_lock); + return ret; +} + +static u32 spu_backing_signal1_read(struct spu_context *ctx) +{ + return ctx->csa.spu_chnldata_RW[3]; +} + +static void spu_backing_signal1_write(struct spu_context *ctx, u32 data) +{ + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.priv2.spu_cfg_RW & 0x1) + ctx->csa.spu_chnldata_RW[3] |= data; + else + ctx->csa.spu_chnldata_RW[3] = data; + ctx->csa.spu_chnlcnt_RW[3] = 1; + gen_spu_event(ctx, MFC_SIGNAL_1_EVENT); + spin_unlock(&ctx->csa.register_lock); +} + +static u32 spu_backing_signal2_read(struct spu_context *ctx) +{ + return ctx->csa.spu_chnldata_RW[4]; +} + +static void spu_backing_signal2_write(struct spu_context *ctx, u32 data) +{ + spin_lock(&ctx->csa.register_lock); + if (ctx->csa.priv2.spu_cfg_RW & 0x2) + ctx->csa.spu_chnldata_RW[4] |= data; + else + ctx->csa.spu_chnldata_RW[4] = data; + ctx->csa.spu_chnlcnt_RW[4] = 1; + gen_spu_event(ctx, MFC_SIGNAL_2_EVENT); + spin_unlock(&ctx->csa.register_lock); +} + +static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val) +{ + u64 tmp; + + spin_lock(&ctx->csa.register_lock); + tmp = ctx->csa.priv2.spu_cfg_RW; + if (val) + tmp |= 1; + else + tmp &= ~1; + ctx->csa.priv2.spu_cfg_RW = tmp; + spin_unlock(&ctx->csa.register_lock); +} + +static u64 spu_backing_signal1_type_get(struct spu_context *ctx) +{ + return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0); +} + +static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val) +{ + u64 tmp; + + spin_lock(&ctx->csa.register_lock); + tmp = ctx->csa.priv2.spu_cfg_RW; + if (val) + tmp |= 2; + else + tmp &= ~2; + ctx->csa.priv2.spu_cfg_RW = tmp; + spin_unlock(&ctx->csa.register_lock); +} + +static u64 spu_backing_signal2_type_get(struct spu_context *ctx) +{ + return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0); +} + +static u32 spu_backing_npc_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_npc_RW; +} + +static void spu_backing_npc_write(struct spu_context *ctx, u32 val) +{ + ctx->csa.prob.spu_npc_RW = val; +} + +static u32 spu_backing_status_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_status_R; +} + +static char *spu_backing_get_ls(struct spu_context *ctx) +{ + return ctx->csa.lscsa->ls; +} + +static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val) +{ + ctx->csa.priv2.spu_privcntl_RW = val; +} + +static u32 spu_backing_runcntl_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_runcntl_RW; +} + +static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val) +{ + spin_lock(&ctx->csa.register_lock); + ctx->csa.prob.spu_runcntl_RW = val; + if (val & SPU_RUNCNTL_RUNNABLE) { + ctx->csa.prob.spu_status_R &= + ~SPU_STATUS_STOPPED_BY_STOP & + ~SPU_STATUS_STOPPED_BY_HALT & + ~SPU_STATUS_SINGLE_STEP & + ~SPU_STATUS_INVALID_INSTR & + ~SPU_STATUS_INVALID_CH; + ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING; + } else { + ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING; + } + spin_unlock(&ctx->csa.register_lock); +} + +static void spu_backing_runcntl_stop(struct spu_context *ctx) +{ + spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP); +} + +static void spu_backing_master_start(struct spu_context *ctx) +{ + struct spu_state *csa = &ctx->csa; + u64 sr1; + + spin_lock(&csa->register_lock); + sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK; + csa->priv1.mfc_sr1_RW = sr1; + spin_unlock(&csa->register_lock); +} + +static void spu_backing_master_stop(struct spu_context *ctx) +{ + struct spu_state *csa = &ctx->csa; + u64 sr1; + + spin_lock(&csa->register_lock); + sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + csa->priv1.mfc_sr1_RW = sr1; + spin_unlock(&csa->register_lock); +} + +static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask, + u32 mode) +{ + struct spu_problem_collapsed *prob = &ctx->csa.prob; + int ret; + + spin_lock(&ctx->csa.register_lock); + ret = -EAGAIN; + if (prob->dma_querytype_RW) + goto out; + ret = 0; + /* FIXME: what are the side-effects of this? */ + prob->dma_querymask_RW = mask; + prob->dma_querytype_RW = mode; + /* In the current implementation, the SPU context is always + * acquired in runnable state when new bits are added to the + * mask (tagwait), so it's sufficient just to mask + * dma_tagstatus_R with the 'mask' parameter here. + */ + ctx->csa.prob.dma_tagstatus_R &= mask; +out: + spin_unlock(&ctx->csa.register_lock); + + return ret; +} + +static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx) +{ + return ctx->csa.prob.dma_tagstatus_R; +} + +static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx) +{ + return ctx->csa.prob.dma_qstatus_R; +} + +static int spu_backing_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command *cmd) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + ret = -EAGAIN; + /* FIXME: set up priv2->puq */ + spin_unlock(&ctx->csa.register_lock); + + return ret; +} + +static void spu_backing_restart_dma(struct spu_context *ctx) +{ + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND; +} + +struct spu_context_ops spu_backing_ops = { + .mbox_read = spu_backing_mbox_read, + .mbox_stat_read = spu_backing_mbox_stat_read, + .mbox_stat_poll = spu_backing_mbox_stat_poll, + .ibox_read = spu_backing_ibox_read, + .wbox_write = spu_backing_wbox_write, + .signal1_read = spu_backing_signal1_read, + .signal1_write = spu_backing_signal1_write, + .signal2_read = spu_backing_signal2_read, + .signal2_write = spu_backing_signal2_write, + .signal1_type_set = spu_backing_signal1_type_set, + .signal1_type_get = spu_backing_signal1_type_get, + .signal2_type_set = spu_backing_signal2_type_set, + .signal2_type_get = spu_backing_signal2_type_get, + .npc_read = spu_backing_npc_read, + .npc_write = spu_backing_npc_write, + .status_read = spu_backing_status_read, + .get_ls = spu_backing_get_ls, + .privcntl_write = spu_backing_privcntl_write, + .runcntl_read = spu_backing_runcntl_read, + .runcntl_write = spu_backing_runcntl_write, + .runcntl_stop = spu_backing_runcntl_stop, + .master_start = spu_backing_master_start, + .master_stop = spu_backing_master_stop, + .set_mfc_query = spu_backing_set_mfc_query, + .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus, + .get_mfc_free_elements = spu_backing_get_mfc_free_elements, + .send_mfc_command = spu_backing_send_mfc_command, + .restart_dma = spu_backing_restart_dma, +}; diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c new file mode 100644 index 0000000000..7a39cc414f --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SPU file system -- SPU context management + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "spufs.h" +#include "sputrace.h" + + +atomic_t nr_spu_contexts = ATOMIC_INIT(0); + +struct spu_context *alloc_spu_context(struct spu_gang *gang) +{ + struct spu_context *ctx; + + ctx = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx) + goto out; + /* Binding to physical processor deferred + * until spu_activate(). + */ + if (spu_init_csa(&ctx->csa)) + goto out_free; + spin_lock_init(&ctx->mmio_lock); + mutex_init(&ctx->mapping_lock); + kref_init(&ctx->kref); + mutex_init(&ctx->state_mutex); + mutex_init(&ctx->run_mutex); + init_waitqueue_head(&ctx->ibox_wq); + init_waitqueue_head(&ctx->wbox_wq); + init_waitqueue_head(&ctx->stop_wq); + init_waitqueue_head(&ctx->mfc_wq); + init_waitqueue_head(&ctx->run_wq); + ctx->state = SPU_STATE_SAVED; + ctx->ops = &spu_backing_ops; + ctx->owner = get_task_mm(current); + INIT_LIST_HEAD(&ctx->rq); + INIT_LIST_HEAD(&ctx->aff_list); + if (gang) + spu_gang_add_ctx(gang, ctx); + + __spu_update_sched_info(ctx); + spu_set_timeslice(ctx); + ctx->stats.util_state = SPU_UTIL_IDLE_LOADED; + ctx->stats.tstamp = ktime_get_ns(); + + atomic_inc(&nr_spu_contexts); + goto out; +out_free: + kfree(ctx); + ctx = NULL; +out: + return ctx; +} + +void destroy_spu_context(struct kref *kref) +{ + struct spu_context *ctx; + ctx = container_of(kref, struct spu_context, kref); + spu_context_nospu_trace(destroy_spu_context__enter, ctx); + mutex_lock(&ctx->state_mutex); + spu_deactivate(ctx); + mutex_unlock(&ctx->state_mutex); + spu_fini_csa(&ctx->csa); + if (ctx->gang) + spu_gang_remove_ctx(ctx->gang, ctx); + if (ctx->prof_priv_kref) + kref_put(ctx->prof_priv_kref, ctx->prof_priv_release); + BUG_ON(!list_empty(&ctx->rq)); + atomic_dec(&nr_spu_contexts); + kfree(ctx->switch_log); + kfree(ctx); +} + +struct spu_context * get_spu_context(struct spu_context *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} + +int put_spu_context(struct spu_context *ctx) +{ + return kref_put(&ctx->kref, &destroy_spu_context); +} + +/* give up the mm reference when the context is about to be destroyed */ +void spu_forget(struct spu_context *ctx) +{ + struct mm_struct *mm; + + /* + * This is basically an open-coded spu_acquire_saved, except that + * we don't acquire the state mutex interruptible, and we don't + * want this context to be rescheduled on release. + */ + mutex_lock(&ctx->state_mutex); + if (ctx->state != SPU_STATE_SAVED) + spu_deactivate(ctx); + + mm = ctx->owner; + ctx->owner = NULL; + mmput(mm); + spu_release(ctx); +} + +void spu_unmap_mappings(struct spu_context *ctx) +{ + mutex_lock(&ctx->mapping_lock); + if (ctx->local_store) + unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); + if (ctx->mfc) + unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1); + if (ctx->cntl) + unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1); + if (ctx->signal1) + unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1); + if (ctx->signal2) + unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1); + if (ctx->mss) + unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1); + if (ctx->psmap) + unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1); + mutex_unlock(&ctx->mapping_lock); +} + +/** + * spu_acquire_saved - lock spu contex and make sure it is in saved state + * @ctx: spu contex to lock + */ +int spu_acquire_saved(struct spu_context *ctx) +{ + int ret; + + spu_context_nospu_trace(spu_acquire_saved__enter, ctx); + + ret = spu_acquire(ctx); + if (ret) + return ret; + + if (ctx->state != SPU_STATE_SAVED) { + set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags); + spu_deactivate(ctx); + } + + return 0; +} + +/** + * spu_release_saved - unlock spu context and return it to the runqueue + * @ctx: context to unlock + */ +void spu_release_saved(struct spu_context *ctx) +{ + BUG_ON(ctx->state != SPU_STATE_SAVED); + + if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) && + test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + spu_activate(ctx, 0); + + spu_release(ctx); +} + diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c new file mode 100644 index 0000000000..1a58761801 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SPU core dump code + * + * (C) Copyright 2006 IBM Corp. + * + * Author: Dwayne Grant McConnell + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "spufs.h" + +static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) +{ + int i, sz, total = 0; + char *name; + char fullname[80]; + + for (i = 0; spufs_coredump_read[i].name != NULL; i++) { + name = spufs_coredump_read[i].name; + sz = spufs_coredump_read[i].size; + + sprintf(fullname, "SPU/%d/%s", dfd, name); + + total += sizeof(struct elf_note); + total += roundup(strlen(fullname) + 1, 4); + total += roundup(sz, 4); + } + + return total; +} + +static int match_context(const void *v, struct file *file, unsigned fd) +{ + struct spu_context *ctx; + if (file->f_op != &spufs_context_fops) + return 0; + ctx = SPUFS_I(file_inode(file))->i_ctx; + if (ctx->flags & SPU_CREATE_NOSCHED) + return 0; + return fd + 1; +} + +/* + * The additional architecture-specific notes for Cell are various + * context files in the spu context. + * + * This function iterates over all open file descriptors and sees + * if they are a directory in spufs. In that case we use spufs + * internal functionality to dump them without needing to actually + * open the files. + */ +/* + * descriptor table is not shared, so files can't change or go away. + */ +static struct spu_context *coredump_next_context(int *fd) +{ + struct spu_context *ctx; + struct file *file; + int n = iterate_fd(current->files, *fd, match_context, NULL); + if (!n) + return NULL; + *fd = n - 1; + + rcu_read_lock(); + file = lookup_fd_rcu(*fd); + ctx = SPUFS_I(file_inode(file))->i_ctx; + get_spu_context(ctx); + rcu_read_unlock(); + + return ctx; +} + +int spufs_coredump_extra_notes_size(void) +{ + struct spu_context *ctx; + int size = 0, rc, fd; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + rc = spu_acquire_saved(ctx); + if (rc) { + put_spu_context(ctx); + break; + } + + rc = spufs_ctx_note_size(ctx, fd); + spu_release_saved(ctx); + if (rc < 0) { + put_spu_context(ctx); + break; + } + + size += rc; + + /* start searching the next fd next time */ + fd++; + put_spu_context(ctx); + } + + return size; +} + +static int spufs_arch_write_note(struct spu_context *ctx, int i, + struct coredump_params *cprm, int dfd) +{ + size_t sz = spufs_coredump_read[i].size; + char fullname[80]; + struct elf_note en; + int ret; + + sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name); + en.n_namesz = strlen(fullname) + 1; + en.n_descsz = sz; + en.n_type = NT_SPU; + + if (!dump_emit(cprm, &en, sizeof(en))) + return -EIO; + if (!dump_emit(cprm, fullname, en.n_namesz)) + return -EIO; + if (!dump_align(cprm, 4)) + return -EIO; + + if (spufs_coredump_read[i].dump) { + ret = spufs_coredump_read[i].dump(ctx, cprm); + if (ret < 0) + return ret; + } else { + char buf[32]; + + ret = snprintf(buf, sizeof(buf), "0x%.16llx", + spufs_coredump_read[i].get(ctx)); + if (ret >= sizeof(buf)) + return sizeof(buf); + + /* count trailing the NULL: */ + if (!dump_emit(cprm, buf, ret + 1)) + return -EIO; + } + + dump_skip_to(cprm, roundup(cprm->pos - ret + sz, 4)); + return 0; +} + +int spufs_coredump_extra_notes_write(struct coredump_params *cprm) +{ + struct spu_context *ctx; + int fd, j, rc; + + fd = 0; + while ((ctx = coredump_next_context(&fd)) != NULL) { + rc = spu_acquire_saved(ctx); + if (rc) + return rc; + + for (j = 0; spufs_coredump_read[j].name != NULL; j++) { + rc = spufs_arch_write_note(ctx, j, cprm, fd); + if (rc) { + spu_release_saved(ctx); + return rc; + } + } + + spu_release_saved(ctx); + + /* start searching the next fd next time */ + fd++; + } + + return 0; +} diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c new file mode 100644 index 0000000000..24adbe3c60 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Low-level SPU handling + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ +#include +#include + +#include +#include + +#include "spufs.h" + +/** + * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag. + * + * If the context was created with events, we just set the return event. + * Otherwise, send an appropriate signal to the process. + */ +static void spufs_handle_event(struct spu_context *ctx, + unsigned long ea, int type) +{ + if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) { + ctx->event_return |= type; + wake_up_all(&ctx->stop_wq); + return; + } + + switch (type) { + case SPE_EVENT_INVALID_DMA: + force_sig_fault(SIGBUS, BUS_OBJERR, NULL); + break; + case SPE_EVENT_SPE_DATA_STORAGE: + ctx->ops->restart_dma(ctx); + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea); + break; + case SPE_EVENT_DMA_ALIGNMENT: + /* DAR isn't set for an alignment fault :( */ + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); + break; + case SPE_EVENT_SPE_ERROR: + force_sig_fault( + SIGILL, ILL_ILLOPC, + (void __user *)(unsigned long) + ctx->ops->npc_read(ctx) - 4); + break; + } +} + +int spufs_handle_class0(struct spu_context *ctx) +{ + unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK; + + if (likely(!stat)) + return 0; + + if (stat & CLASS0_DMA_ALIGNMENT_INTR) + spufs_handle_event(ctx, ctx->csa.class_0_dar, + SPE_EVENT_DMA_ALIGNMENT); + + if (stat & CLASS0_INVALID_DMA_COMMAND_INTR) + spufs_handle_event(ctx, ctx->csa.class_0_dar, + SPE_EVENT_INVALID_DMA); + + if (stat & CLASS0_SPU_ERROR_INTR) + spufs_handle_event(ctx, ctx->csa.class_0_dar, + SPE_EVENT_SPE_ERROR); + + ctx->csa.class_0_pending = 0; + + return -EIO; +} + +/* + * bottom half handler for page faults, we can't do this from + * interrupt context, since we might need to sleep. + * we also need to give up the mutex so we can get scheduled + * out while waiting for the backing store. + * + * TODO: try calling hash_page from the interrupt handler first + * in order to speed up the easy case. + */ +int spufs_handle_class1(struct spu_context *ctx) +{ + u64 ea, dsisr, access; + unsigned long flags; + vm_fault_t flt = 0; + int ret; + + /* + * dar and dsisr get passed from the registers + * to the spu_context, to this function, but not + * back to the spu if it gets scheduled again. + * + * if we don't handle the fault for a saved context + * in time, we can still expect to get the same fault + * the immediately after the context restore. + */ + ea = ctx->csa.class_1_dar; + dsisr = ctx->csa.class_1_dsisr; + + if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))) + return 0; + + spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); + + pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea, + dsisr, ctx->state); + + ctx->stats.hash_flt++; + if (ctx->state == SPU_STATE_RUNNABLE) + ctx->spu->stats.hash_flt++; + + /* we must not hold the lock when entering copro_handle_mm_fault */ + spu_release(ctx); + + access = (_PAGE_PRESENT | _PAGE_READ); + access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL; + local_irq_save(flags); + ret = hash_page(ea, access, 0x300, dsisr); + local_irq_restore(flags); + + /* hashing failed, so try the actual fault handler */ + if (ret) + ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt); + + /* + * This is nasty: we need the state_mutex for all the bookkeeping even + * if the syscall was interrupted by a signal. ewww. + */ + mutex_lock(&ctx->state_mutex); + + /* + * Clear dsisr under ctxt lock after handling the fault, so that + * time slicing will not preempt the context while the page fault + * handler is running. Context switch code removes mappings. + */ + ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0; + + /* + * If we handled the fault successfully and are in runnable + * state, restart the DMA. + * In case of unhandled error report the problem to user space. + */ + if (!ret) { + if (flt & VM_FAULT_MAJOR) + ctx->stats.maj_flt++; + else + ctx->stats.min_flt++; + if (ctx->state == SPU_STATE_RUNNABLE) { + if (flt & VM_FAULT_MAJOR) + ctx->spu->stats.maj_flt++; + else + ctx->spu->stats.min_flt++; + } + + if (ctx->spu) + ctx->ops->restart_dma(ctx); + } else + spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE); + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + return ret; +} diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c new file mode 100644 index 0000000000..02a8158c46 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -0,0 +1,2633 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SPU file system -- file contents + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "spufs.h" +#include "sputrace.h" + +#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000) + +/* Simple attribute files */ +struct spufs_attr { + int (*get)(void *, u64 *); + int (*set)(void *, u64); + char get_buf[24]; /* enough to store a u64 and "\n\0" */ + char set_buf[24]; + void *data; + const char *fmt; /* format for read operation */ + struct mutex mutex; /* protects access to these buffers */ +}; + +static int spufs_attr_open(struct inode *inode, struct file *file, + int (*get)(void *, u64 *), int (*set)(void *, u64), + const char *fmt) +{ + struct spufs_attr *attr; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->get = get; + attr->set = set; + attr->data = inode->i_private; + attr->fmt = fmt; + mutex_init(&attr->mutex); + file->private_data = attr; + + return nonseekable_open(inode, file); +} + +static int spufs_attr_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t spufs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct spufs_attr *attr; + size_t size; + ssize_t ret; + + attr = file->private_data; + if (!attr->get) + return -EACCES; + + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + if (*ppos) { /* continued read */ + size = strlen(attr->get_buf); + } else { /* first read */ + u64 val; + ret = attr->get(attr->data, &val); + if (ret) + goto out; + + size = scnprintf(attr->get_buf, sizeof(attr->get_buf), + attr->fmt, (unsigned long long)val); + } + + ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); +out: + mutex_unlock(&attr->mutex); + return ret; +} + +static ssize_t spufs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct spufs_attr *attr; + u64 val; + size_t size; + ssize_t ret; + + attr = file->private_data; + if (!attr->set) + return -EACCES; + + ret = mutex_lock_interruptible(&attr->mutex); + if (ret) + return ret; + + ret = -EFAULT; + size = min(sizeof(attr->set_buf) - 1, len); + if (copy_from_user(attr->set_buf, buf, size)) + goto out; + + ret = len; /* claim we got the whole input */ + attr->set_buf[size] = '\0'; + val = simple_strtol(attr->set_buf, NULL, 0); + attr->set(attr->data, val); +out: + mutex_unlock(&attr->mutex); + return ret; +} + +static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf, + size_t size) +{ + if (!dump_emit(cprm, buf, size)) + return -EIO; + return size; +} + +#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return spufs_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .open = __fops ## _open, \ + .release = spufs_attr_release, \ + .read = spufs_attr_read, \ + .write = spufs_attr_write, \ + .llseek = generic_file_llseek, \ +}; + + +static int +spufs_mem_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->local_store = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static int +spufs_mem_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->local_store = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static ssize_t +spufs_mem_dump(struct spu_context *ctx, struct coredump_params *cprm) +{ + return spufs_dump_emit(cprm, ctx->ops->get_ls(ctx), LS_SIZE); +} + +static ssize_t +spufs_mem_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ret = simple_read_from_buffer(buffer, size, pos, ctx->ops->get_ls(ctx), + LS_SIZE); + spu_release(ctx); + + return ret; +} + +static ssize_t +spufs_mem_write(struct file *file, const char __user *buffer, + size_t size, loff_t *ppos) +{ + struct spu_context *ctx = file->private_data; + char *local_store; + loff_t pos = *ppos; + int ret; + + if (pos > LS_SIZE) + return -EFBIG; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + local_store = ctx->ops->get_ls(ctx); + size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size); + spu_release(ctx); + + return size; +} + +static vm_fault_t +spufs_mem_mmap_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long pfn, offset; + vm_fault_t ret; + + offset = vmf->pgoff << PAGE_SHIFT; + if (offset >= LS_SIZE) + return VM_FAULT_SIGBUS; + + pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n", + vmf->address, offset); + + if (spu_acquire(ctx)) + return VM_FAULT_NOPAGE; + + if (ctx->state == SPU_STATE_SAVED) { + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset); + } else { + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT; + } + ret = vmf_insert_pfn(vma, vmf->address, pfn); + + spu_release(ctx); + + return ret; +} + +static int spufs_mem_mmap_access(struct vm_area_struct *vma, + unsigned long address, + void *buf, int len, int write) +{ + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long offset = address - vma->vm_start; + char *local_store; + + if (write && !(vma->vm_flags & VM_WRITE)) + return -EACCES; + if (spu_acquire(ctx)) + return -EINTR; + if ((offset + len) > vma->vm_end) + len = vma->vm_end - offset; + local_store = ctx->ops->get_ls(ctx); + if (write) + memcpy_toio(local_store + offset, buf, len); + else + memcpy_fromio(buf, local_store + offset, len); + spu_release(ctx); + return len; +} + +static const struct vm_operations_struct spufs_mem_mmap_vmops = { + .fault = spufs_mem_mmap_fault, + .access = spufs_mem_mmap_access, +}; + +static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); + + vma->vm_ops = &spufs_mem_mmap_vmops; + return 0; +} + +static const struct file_operations spufs_mem_fops = { + .open = spufs_mem_open, + .release = spufs_mem_release, + .read = spufs_mem_read, + .write = spufs_mem_write, + .llseek = generic_file_llseek, + .mmap = spufs_mem_mmap, +}; + +static vm_fault_t spufs_ps_fault(struct vm_fault *vmf, + unsigned long ps_offs, + unsigned long ps_size) +{ + struct spu_context *ctx = vmf->vma->vm_file->private_data; + unsigned long area, offset = vmf->pgoff << PAGE_SHIFT; + int err = 0; + vm_fault_t ret = VM_FAULT_NOPAGE; + + spu_context_nospu_trace(spufs_ps_fault__enter, ctx); + + if (offset >= ps_size) + return VM_FAULT_SIGBUS; + + if (fatal_signal_pending(current)) + return VM_FAULT_SIGBUS; + + /* + * Because we release the mmap_lock, the context may be destroyed while + * we're in spu_wait. Grab an extra reference so it isn't destroyed + * in the meantime. + */ + get_spu_context(ctx); + + /* + * We have to wait for context to be loaded before we have + * pages to hand out to the user, but we don't want to wait + * with the mmap_lock held. + * It is possible to drop the mmap_lock here, but then we need + * to return VM_FAULT_NOPAGE because the mappings may have + * hanged. + */ + if (spu_acquire(ctx)) + goto refault; + + if (ctx->state == SPU_STATE_SAVED) { + mmap_read_unlock(current->mm); + spu_context_nospu_trace(spufs_ps_fault__sleep, ctx); + err = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); + spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu); + mmap_read_lock(current->mm); + } else { + area = ctx->spu->problem_phys + ps_offs; + ret = vmf_insert_pfn(vmf->vma, vmf->address, + (area + offset) >> PAGE_SHIFT); + spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu); + } + + if (!err) + spu_release(ctx); + +refault: + put_spu_context(ctx); + return ret; +} + +#if SPUFS_MMAP_4K +static vm_fault_t spufs_cntl_mmap_fault(struct vm_fault *vmf) +{ + return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_cntl_mmap_vmops = { + .fault = spufs_cntl_mmap_fault, +}; + +/* + * mmap support for problem state control area [0x4000 - 0x4fff]. + */ +static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_cntl_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_cntl_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static int spufs_cntl_get(void *data, u64 *val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + *val = ctx->ops->status_read(ctx); + spu_release(ctx); + + return 0; +} + +static int spufs_cntl_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->runcntl_write(ctx, val); + spu_release(ctx); + + return 0; +} + +static int spufs_cntl_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->cntl = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return simple_attr_open(inode, file, spufs_cntl_get, + spufs_cntl_set, "0x%08lx"); +} + +static int +spufs_cntl_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + simple_attr_release(inode, file); + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->cntl = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static const struct file_operations spufs_cntl_fops = { + .open = spufs_cntl_open, + .release = spufs_cntl_release, + .read = simple_attr_read, + .write = simple_attr_write, + .llseek = no_llseek, + .mmap = spufs_cntl_mmap, +}; + +static int +spufs_regs_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + return 0; +} + +static ssize_t +spufs_regs_dump(struct spu_context *ctx, struct coredump_params *cprm) +{ + return spufs_dump_emit(cprm, ctx->csa.lscsa->gprs, + sizeof(ctx->csa.lscsa->gprs)); +} + +static ssize_t +spufs_regs_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + /* pre-check for file position: if we'd return EOF, there's no point + * causing a deschedule */ + if (*pos >= sizeof(ctx->csa.lscsa->gprs)) + return 0; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = simple_read_from_buffer(buffer, size, pos, ctx->csa.lscsa->gprs, + sizeof(ctx->csa.lscsa->gprs)); + spu_release_saved(ctx); + return ret; +} + +static ssize_t +spufs_regs_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + if (*pos >= sizeof(lscsa->gprs)) + return -EFBIG; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + + size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos, + buffer, size); + + spu_release_saved(ctx); + return size; +} + +static const struct file_operations spufs_regs_fops = { + .open = spufs_regs_open, + .read = spufs_regs_read, + .write = spufs_regs_write, + .llseek = generic_file_llseek, +}; + +static ssize_t +spufs_fpcr_dump(struct spu_context *ctx, struct coredump_params *cprm) +{ + return spufs_dump_emit(cprm, &ctx->csa.lscsa->fpcr, + sizeof(ctx->csa.lscsa->fpcr)); +} + +static ssize_t +spufs_fpcr_read(struct file *file, char __user * buffer, + size_t size, loff_t * pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = simple_read_from_buffer(buffer, size, pos, &ctx->csa.lscsa->fpcr, + sizeof(ctx->csa.lscsa->fpcr)); + spu_release_saved(ctx); + return ret; +} + +static ssize_t +spufs_fpcr_write(struct file *file, const char __user * buffer, + size_t size, loff_t * pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + if (*pos >= sizeof(lscsa->fpcr)) + return -EFBIG; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + + size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos, + buffer, size); + + spu_release_saved(ctx); + return size; +} + +static const struct file_operations spufs_fpcr_fops = { + .open = spufs_regs_open, + .read = spufs_fpcr_read, + .write = spufs_fpcr_write, + .llseek = generic_file_llseek, +}; + +/* generic open function for all pipe-like files */ +static int spufs_pipe_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + file->private_data = i->i_ctx; + + return stream_open(inode, file); +} + +/* + * Read as many bytes from the mailbox as possible, until + * one of the conditions becomes true: + * + * - no more data available in the mailbox + * - end of the user provided buffer + * - end of the mapped area + */ +static ssize_t spufs_mbox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 mbox_data, __user *udata = (void __user *)buf; + ssize_t count; + + if (len < 4) + return -EINVAL; + + count = spu_acquire(ctx); + if (count) + return count; + + for (count = 0; (count + 4) <= len; count += 4, udata++) { + int ret; + ret = ctx->ops->mbox_read(ctx, &mbox_data); + if (ret == 0) + break; + + /* + * at the end of the mapped area, we can fault + * but still need to return the data we have + * read successfully so far. + */ + ret = put_user(mbox_data, udata); + if (ret) { + if (!count) + count = -EFAULT; + break; + } + } + spu_release(ctx); + + if (!count) + count = -EAGAIN; + + return count; +} + +static const struct file_operations spufs_mbox_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_read, + .llseek = no_llseek, +}; + +static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + u32 mbox_stat; + + if (len < 4) + return -EINVAL; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff; + + spu_release(ctx); + + if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat)) + return -EFAULT; + + return 4; +} + +static const struct file_operations spufs_mbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_mbox_stat_read, + .llseek = no_llseek, +}; + +/* low-level ibox access function */ +size_t spu_ibox_read(struct spu_context *ctx, u32 *data) +{ + return ctx->ops->ibox_read(ctx, data); +} + +/* interrupt-level ibox callback function. */ +void spufs_ibox_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + if (ctx) + wake_up_all(&ctx->ibox_wq); +} + +/* + * Read as many bytes from the interrupt mailbox as possible, until + * one of the conditions becomes true: + * + * - no more data available in the mailbox + * - end of the user provided buffer + * - end of the mapped area + * + * If the file is opened without O_NONBLOCK, we wait here until + * any data is available, but return when we have been able to + * read something. + */ +static ssize_t spufs_ibox_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 ibox_data, __user *udata = (void __user *)buf; + ssize_t count; + + if (len < 4) + return -EINVAL; + + count = spu_acquire(ctx); + if (count) + goto out; + + /* wait only for the first element */ + count = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_ibox_read(ctx, &ibox_data)) { + count = -EAGAIN; + goto out_unlock; + } + } else { + count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data)); + if (count) + goto out; + } + + /* if we can't write at all, return -EFAULT */ + count = put_user(ibox_data, udata); + if (count) + goto out_unlock; + + for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { + int ret; + ret = ctx->ops->ibox_read(ctx, &ibox_data); + if (ret == 0) + break; + /* + * at the end of the mapped area, we can fault + * but still need to return the data we have + * read successfully so far. + */ + ret = put_user(ibox_data, udata); + if (ret) + break; + } + +out_unlock: + spu_release(ctx); +out: + return count; +} + +static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + __poll_t mask; + + poll_wait(file, &ctx->ibox_wq, wait); + + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); + mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM); + spu_release(ctx); + + return mask; +} + +static const struct file_operations spufs_ibox_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_read, + .poll = spufs_ibox_poll, + .llseek = no_llseek, +}; + +static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + u32 ibox_stat; + + if (len < 4) + return -EINVAL; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff; + spu_release(ctx); + + if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat)) + return -EFAULT; + + return 4; +} + +static const struct file_operations spufs_ibox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_ibox_stat_read, + .llseek = no_llseek, +}; + +/* low-level mailbox write */ +size_t spu_wbox_write(struct spu_context *ctx, u32 data) +{ + return ctx->ops->wbox_write(ctx, data); +} + +/* interrupt-level wbox callback function. */ +void spufs_wbox_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + if (ctx) + wake_up_all(&ctx->wbox_wq); +} + +/* + * Write as many bytes to the interrupt mailbox as possible, until + * one of the conditions becomes true: + * + * - the mailbox is full + * - end of the user provided buffer + * - end of the mapped area + * + * If the file is opened without O_NONBLOCK, we wait here until + * space is available, but return when we have been able to + * write something. + */ +static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 wbox_data, __user *udata = (void __user *)buf; + ssize_t count; + + if (len < 4) + return -EINVAL; + + if (get_user(wbox_data, udata)) + return -EFAULT; + + count = spu_acquire(ctx); + if (count) + goto out; + + /* + * make sure we can at least write one element, by waiting + * in case of !O_NONBLOCK + */ + count = 0; + if (file->f_flags & O_NONBLOCK) { + if (!spu_wbox_write(ctx, wbox_data)) { + count = -EAGAIN; + goto out_unlock; + } + } else { + count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data)); + if (count) + goto out; + } + + + /* write as much as possible */ + for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { + int ret; + ret = get_user(wbox_data, udata); + if (ret) + break; + + ret = spu_wbox_write(ctx, wbox_data); + if (ret == 0) + break; + } + +out_unlock: + spu_release(ctx); +out: + return count; +} + +static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + __poll_t mask; + + poll_wait(file, &ctx->wbox_wq, wait); + + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); + mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM); + spu_release(ctx); + + return mask; +} + +static const struct file_operations spufs_wbox_fops = { + .open = spufs_pipe_open, + .write = spufs_wbox_write, + .poll = spufs_wbox_poll, + .llseek = no_llseek, +}; + +static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + ssize_t ret; + u32 wbox_stat; + + if (len < 4) + return -EINVAL; + + ret = spu_acquire(ctx); + if (ret) + return ret; + wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff; + spu_release(ctx); + + if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat)) + return -EFAULT; + + return 4; +} + +static const struct file_operations spufs_wbox_stat_fops = { + .open = spufs_pipe_open, + .read = spufs_wbox_stat_read, + .llseek = no_llseek, +}; + +static int spufs_signal1_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->signal1 = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_signal1_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->signal1 = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static ssize_t spufs_signal1_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + if (!ctx->csa.spu_chnlcnt_RW[3]) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[3], + sizeof(ctx->csa.spu_chnldata_RW[3])); +} + +static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf, + size_t len) +{ + if (len < sizeof(ctx->csa.spu_chnldata_RW[3])) + return -EINVAL; + if (!ctx->csa.spu_chnlcnt_RW[3]) + return 0; + if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[3], + sizeof(ctx->csa.spu_chnldata_RW[3]))) + return -EFAULT; + return sizeof(ctx->csa.spu_chnldata_RW[3]); +} + +static ssize_t spufs_signal1_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = __spufs_signal1_read(ctx, buf, len); + spu_release_saved(ctx); + + return ret; +} + +static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + ssize_t ret; + u32 data; + + ctx = file->private_data; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal1_write(ctx, data); + spu_release(ctx); + + return 4; +} + +static vm_fault_t +spufs_signal1_mmap_fault(struct vm_fault *vmf) +{ +#if SPUFS_SIGNAL_MAP_SIZE == 0x1000 + return spufs_ps_fault(vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE); +#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000 + /* For 64k pages, both signal1 and signal2 can be used to mmap the whole + * signal 1 and 2 area + */ + return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); +#else +#error unsupported page size +#endif +} + +static const struct vm_operations_struct spufs_signal1_mmap_vmops = { + .fault = spufs_signal1_mmap_fault, +}; + +static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_signal1_mmap_vmops; + return 0; +} + +static const struct file_operations spufs_signal1_fops = { + .open = spufs_signal1_open, + .release = spufs_signal1_release, + .read = spufs_signal1_read, + .write = spufs_signal1_write, + .mmap = spufs_signal1_mmap, + .llseek = no_llseek, +}; + +static const struct file_operations spufs_signal1_nosched_fops = { + .open = spufs_signal1_open, + .release = spufs_signal1_release, + .write = spufs_signal1_write, + .mmap = spufs_signal1_mmap, + .llseek = no_llseek, +}; + +static int spufs_signal2_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->signal2 = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_signal2_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->signal2 = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static ssize_t spufs_signal2_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + if (!ctx->csa.spu_chnlcnt_RW[4]) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[4], + sizeof(ctx->csa.spu_chnldata_RW[4])); +} + +static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf, + size_t len) +{ + if (len < sizeof(ctx->csa.spu_chnldata_RW[4])) + return -EINVAL; + if (!ctx->csa.spu_chnlcnt_RW[4]) + return 0; + if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[4], + sizeof(ctx->csa.spu_chnldata_RW[4]))) + return -EFAULT; + return sizeof(ctx->csa.spu_chnldata_RW[4]); +} + +static ssize_t spufs_signal2_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + ret = __spufs_signal2_read(ctx, buf, len); + spu_release_saved(ctx); + + return ret; +} + +static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx; + ssize_t ret; + u32 data; + + ctx = file->private_data; + + if (len < 4) + return -EINVAL; + + if (copy_from_user(&data, buf, 4)) + return -EFAULT; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal2_write(ctx, data); + spu_release(ctx); + + return 4; +} + +#if SPUFS_MMAP_4K +static vm_fault_t +spufs_signal2_mmap_fault(struct vm_fault *vmf) +{ +#if SPUFS_SIGNAL_MAP_SIZE == 0x1000 + return spufs_ps_fault(vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE); +#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000 + /* For 64k pages, both signal1 and signal2 can be used to mmap the whole + * signal 1 and 2 area + */ + return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE); +#else +#error unsupported page size +#endif +} + +static const struct vm_operations_struct spufs_signal2_mmap_vmops = { + .fault = spufs_signal2_mmap_fault, +}; + +static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_signal2_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_signal2_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static const struct file_operations spufs_signal2_fops = { + .open = spufs_signal2_open, + .release = spufs_signal2_release, + .read = spufs_signal2_read, + .write = spufs_signal2_write, + .mmap = spufs_signal2_mmap, + .llseek = no_llseek, +}; + +static const struct file_operations spufs_signal2_nosched_fops = { + .open = spufs_signal2_open, + .release = spufs_signal2_release, + .write = spufs_signal2_write, + .mmap = spufs_signal2_mmap, + .llseek = no_llseek, +}; + +/* + * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the + * work of acquiring (or not) the SPU context before calling through + * to the actual get routine. The set routine is called directly. + */ +#define SPU_ATTR_NOACQUIRE 0 +#define SPU_ATTR_ACQUIRE 1 +#define SPU_ATTR_ACQUIRE_SAVED 2 + +#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \ +static int __##__get(void *data, u64 *val) \ +{ \ + struct spu_context *ctx = data; \ + int ret = 0; \ + \ + if (__acquire == SPU_ATTR_ACQUIRE) { \ + ret = spu_acquire(ctx); \ + if (ret) \ + return ret; \ + *val = __get(ctx); \ + spu_release(ctx); \ + } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \ + ret = spu_acquire_saved(ctx); \ + if (ret) \ + return ret; \ + *val = __get(ctx); \ + spu_release_saved(ctx); \ + } else \ + *val = __get(ctx); \ + \ + return 0; \ +} \ +DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt); + +static int spufs_signal1_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal1_type_set(ctx, val); + spu_release(ctx); + + return 0; +} + +static u64 spufs_signal1_type_get(struct spu_context *ctx) +{ + return ctx->ops->signal1_type_get(ctx); +} +DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get, + spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE); + + +static int spufs_signal2_type_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->signal2_type_set(ctx, val); + spu_release(ctx); + + return 0; +} + +static u64 spufs_signal2_type_get(struct spu_context *ctx) +{ + return ctx->ops->signal2_type_get(ctx); +} +DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, + spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE); + +#if SPUFS_MMAP_4K +static vm_fault_t +spufs_mss_mmap_fault(struct vm_fault *vmf) +{ + return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_mss_mmap_vmops = { + .fault = spufs_mss_mmap_fault, +}; + +/* + * mmap support for problem state MFC DMA area [0x0000 - 0x0fff]. + */ +static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_mss_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_mss_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static int spufs_mss_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + file->private_data = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!i->i_openers++) + ctx->mss = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_mss_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->mss = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static const struct file_operations spufs_mss_fops = { + .open = spufs_mss_open, + .release = spufs_mss_release, + .mmap = spufs_mss_mmap, + .llseek = no_llseek, +}; + +static vm_fault_t +spufs_psmap_mmap_fault(struct vm_fault *vmf) +{ + return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_psmap_mmap_vmops = { + .fault = spufs_psmap_mmap_fault, +}; + +/* + * mmap support for full problem state area [0x00000 - 0x1ffff]. + */ +static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_psmap_mmap_vmops; + return 0; +} + +static int spufs_psmap_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + file->private_data = i->i_ctx; + if (!i->i_openers++) + ctx->psmap = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_psmap_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->psmap = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +static const struct file_operations spufs_psmap_fops = { + .open = spufs_psmap_open, + .release = spufs_psmap_release, + .mmap = spufs_psmap_mmap, + .llseek = no_llseek, +}; + + +#if SPUFS_MMAP_4K +static vm_fault_t +spufs_mfc_mmap_fault(struct vm_fault *vmf) +{ + return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE); +} + +static const struct vm_operations_struct spufs_mfc_mmap_vmops = { + .fault = spufs_mfc_mmap_fault, +}; + +/* + * mmap support for problem state MFC DMA area [0x0000 - 0x0fff]. + */ +static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vm_flags_set(vma, VM_IO | VM_PFNMAP); + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &spufs_mfc_mmap_vmops; + return 0; +} +#else /* SPUFS_MMAP_4K */ +#define spufs_mfc_mmap NULL +#endif /* !SPUFS_MMAP_4K */ + +static int spufs_mfc_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + /* we don't want to deal with DMA into other processes */ + if (ctx->owner != current->mm) + return -EINVAL; + + if (atomic_read(&inode->i_count) != 1) + return -EBUSY; + + mutex_lock(&ctx->mapping_lock); + file->private_data = ctx; + if (!i->i_openers++) + ctx->mfc = inode->i_mapping; + mutex_unlock(&ctx->mapping_lock); + return nonseekable_open(inode, file); +} + +static int +spufs_mfc_release(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + mutex_lock(&ctx->mapping_lock); + if (!--i->i_openers) + ctx->mfc = NULL; + mutex_unlock(&ctx->mapping_lock); + return 0; +} + +/* interrupt-level mfc callback function. */ +void spufs_mfc_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + if (ctx) + wake_up_all(&ctx->mfc_wq); +} + +static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status) +{ + /* See if there is one tag group is complete */ + /* FIXME we need locking around tagwait */ + *status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait; + ctx->tagwait &= ~*status; + if (*status) + return 1; + + /* enable interrupt waiting for any tag group, + may silently fail if interrupts are already enabled */ + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1); + return 0; +} + +static ssize_t spufs_mfc_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret = -EINVAL; + u32 status; + + if (size != 4) + goto out; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + ret = -EINVAL; + if (file->f_flags & O_NONBLOCK) { + status = ctx->ops->read_mfc_tagstatus(ctx); + if (!(status & ctx->tagwait)) + ret = -EAGAIN; + else + /* XXX(hch): shouldn't we clear ret here? */ + ctx->tagwait &= ~status; + } else { + ret = spufs_wait(ctx->mfc_wq, + spufs_read_mfc_tagstatus(ctx, &status)); + if (ret) + goto out; + } + spu_release(ctx); + + ret = 4; + if (copy_to_user(buffer, &status, 4)) + ret = -EFAULT; + +out: + return ret; +} + +static int spufs_check_valid_dma(struct mfc_dma_command *cmd) +{ + pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa, + cmd->ea, cmd->size, cmd->tag, cmd->cmd); + + switch (cmd->cmd) { + case MFC_PUT_CMD: + case MFC_PUTF_CMD: + case MFC_PUTB_CMD: + case MFC_GET_CMD: + case MFC_GETF_CMD: + case MFC_GETB_CMD: + break; + default: + pr_debug("invalid DMA opcode %x\n", cmd->cmd); + return -EIO; + } + + if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) { + pr_debug("invalid DMA alignment, ea %llx lsa %x\n", + cmd->ea, cmd->lsa); + return -EIO; + } + + switch (cmd->size & 0xf) { + case 1: + break; + case 2: + if (cmd->lsa & 1) + goto error; + break; + case 4: + if (cmd->lsa & 3) + goto error; + break; + case 8: + if (cmd->lsa & 7) + goto error; + break; + case 0: + if (cmd->lsa & 15) + goto error; + break; + error: + default: + pr_debug("invalid DMA alignment %x for size %x\n", + cmd->lsa & 0xf, cmd->size); + return -EIO; + } + + if (cmd->size > 16 * 1024) { + pr_debug("invalid DMA size %x\n", cmd->size); + return -EIO; + } + + if (cmd->tag & 0xfff0) { + /* we reserve the higher tag numbers for kernel use */ + pr_debug("invalid DMA tag\n"); + return -EIO; + } + + if (cmd->class) { + /* not supported in this version */ + pr_debug("invalid DMA class\n"); + return -EIO; + } + + return 0; +} + +static int spu_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command cmd, + int *error) +{ + *error = ctx->ops->send_mfc_command(ctx, &cmd); + if (*error == -EAGAIN) { + /* wait for any tag group to complete + so we have space for the new command */ + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1); + /* try again, because the queue might be + empty again */ + *error = ctx->ops->send_mfc_command(ctx, &cmd); + if (*error == -EAGAIN) + return 0; + } + return 1; +} + +static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct mfc_dma_command cmd; + int ret = -EINVAL; + + if (size != sizeof cmd) + goto out; + + ret = -EFAULT; + if (copy_from_user(&cmd, buffer, sizeof cmd)) + goto out; + + ret = spufs_check_valid_dma(&cmd); + if (ret) + goto out; + + ret = spu_acquire(ctx); + if (ret) + goto out; + + ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE); + if (ret) + goto out; + + if (file->f_flags & O_NONBLOCK) { + ret = ctx->ops->send_mfc_command(ctx, &cmd); + } else { + int status; + ret = spufs_wait(ctx->mfc_wq, + spu_send_mfc_command(ctx, cmd, &status)); + if (ret) + goto out; + if (status) + ret = status; + } + + if (ret) + goto out_unlock; + + ctx->tagwait |= 1 << cmd.tag; + ret = size; + +out_unlock: + spu_release(ctx); +out: + return ret; +} + +static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + u32 free_elements, tagstatus; + __poll_t mask; + + poll_wait(file, &ctx->mfc_wq, wait); + + /* + * For now keep this uninterruptible and also ignore the rule + * that poll should not sleep. Will be fixed later. + */ + mutex_lock(&ctx->state_mutex); + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2); + free_elements = ctx->ops->get_mfc_free_elements(ctx); + tagstatus = ctx->ops->read_mfc_tagstatus(ctx); + spu_release(ctx); + + mask = 0; + if (free_elements & 0xffff) + mask |= EPOLLOUT | EPOLLWRNORM; + if (tagstatus & ctx->tagwait) + mask |= EPOLLIN | EPOLLRDNORM; + + pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__, + free_elements, tagstatus, ctx->tagwait); + + return mask; +} + +static int spufs_mfc_flush(struct file *file, fl_owner_t id) +{ + struct spu_context *ctx = file->private_data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + goto out; +#if 0 +/* this currently hangs */ + ret = spufs_wait(ctx->mfc_wq, + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2)); + if (ret) + goto out; + ret = spufs_wait(ctx->mfc_wq, + ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait); + if (ret) + goto out; +#else + ret = 0; +#endif + spu_release(ctx); +out: + return ret; +} + +static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + struct inode *inode = file_inode(file); + int err = file_write_and_wait_range(file, start, end); + if (!err) { + inode_lock(inode); + err = spufs_mfc_flush(file, NULL); + inode_unlock(inode); + } + return err; +} + +static const struct file_operations spufs_mfc_fops = { + .open = spufs_mfc_open, + .release = spufs_mfc_release, + .read = spufs_mfc_read, + .write = spufs_mfc_write, + .poll = spufs_mfc_poll, + .flush = spufs_mfc_flush, + .fsync = spufs_mfc_fsync, + .mmap = spufs_mfc_mmap, + .llseek = no_llseek, +}; + +static int spufs_npc_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + ctx->ops->npc_write(ctx, val); + spu_release(ctx); + + return 0; +} + +static u64 spufs_npc_get(struct spu_context *ctx) +{ + return ctx->ops->npc_read(ctx); +} +DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, + "0x%llx\n", SPU_ATTR_ACQUIRE); + +static int spufs_decr_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + lscsa->decr.slot[0] = (u32) val; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_decr_get(struct spu_context *ctx) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->decr.slot[0]; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED); + +static int spufs_decr_status_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + if (val) + ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING; + else + ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_decr_status_get(struct spu_context *ctx) +{ + if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) + return SPU_DECR_STATUS_RUNNING; + else + return 0; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, + spufs_decr_status_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); + +static int spufs_event_mask_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + lscsa->event_mask.slot[0] = (u32) val; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_event_mask_get(struct spu_context *ctx) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->event_mask.slot[0]; +} + +DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, + spufs_event_mask_set, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); + +static u64 spufs_event_status_get(struct spu_context *ctx) +{ + struct spu_state *state = &ctx->csa; + u64 stat; + stat = state->spu_chnlcnt_RW[0]; + if (stat) + return state->spu_chnldata_RW[0]; + return 0; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, + NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) + +static int spufs_srr0_set(void *data, u64 val) +{ + struct spu_context *ctx = data; + struct spu_lscsa *lscsa = ctx->csa.lscsa; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + lscsa->srr0.slot[0] = (u32) val; + spu_release_saved(ctx); + + return 0; +} + +static u64 spufs_srr0_get(struct spu_context *ctx) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->srr0.slot[0]; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, + "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED) + +static u64 spufs_id_get(struct spu_context *ctx) +{ + u64 num; + + if (ctx->state == SPU_STATE_RUNNABLE) + num = ctx->spu->number; + else + num = (unsigned int)-1; + + return num; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE) + +static u64 spufs_object_id_get(struct spu_context *ctx) +{ + /* FIXME: Should there really be no locking here? */ + return ctx->object_id; +} + +static int spufs_object_id_set(void *data, u64 id) +{ + struct spu_context *ctx = data; + ctx->object_id = id; + + return 0; +} + +DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, + spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE); + +static u64 spufs_lslr_get(struct spu_context *ctx) +{ + return ctx->csa.priv2.spu_lslr_RW; +} +DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n", + SPU_ATTR_ACQUIRE_SAVED); + +static int spufs_info_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + file->private_data = ctx; + return 0; +} + +static int spufs_caps_show(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + if (!(ctx->flags & SPU_CREATE_NOSCHED)) + seq_puts(s, "sched\n"); + if (!(ctx->flags & SPU_CREATE_ISOLATE)) + seq_puts(s, "step\n"); + return 0; +} + +static int spufs_caps_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_caps_fops = { + .open = spufs_caps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t spufs_mbox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + if (!(ctx->csa.prob.mb_stat_R & 0x0000ff)) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.prob.pu_mb_R, + sizeof(ctx->csa.prob.pu_mb_R)); +} + +static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 stat, data; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.prob.pu_mb_R; + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + /* EOF if there's no entry in the mbox */ + if (!(stat & 0x0000ff)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); +} + +static const struct file_operations spufs_mbox_info_fops = { + .open = spufs_info_open, + .read = spufs_mbox_info_read, + .llseek = generic_file_llseek, +}; + +static ssize_t spufs_ibox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + if (!(ctx->csa.prob.mb_stat_R & 0xff0000)) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.priv2.puint_mb_R, + sizeof(ctx->csa.priv2.puint_mb_R)); +} + +static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 stat, data; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.priv2.puint_mb_R; + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + /* EOF if there's no entry in the ibox */ + if (!(stat & 0xff0000)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); +} + +static const struct file_operations spufs_ibox_info_fops = { + .open = spufs_info_open, + .read = spufs_ibox_info_read, + .llseek = generic_file_llseek, +}; + +static size_t spufs_wbox_info_cnt(struct spu_context *ctx) +{ + return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32); +} + +static ssize_t spufs_wbox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + return spufs_dump_emit(cprm, &ctx->csa.spu_mailbox_data, + spufs_wbox_info_cnt(ctx)); +} + +static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)]; + int ret, count; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + count = spufs_wbox_info_cnt(ctx); + memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data)); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return simple_read_from_buffer(buf, len, pos, &data, + count * sizeof(u32)); +} + +static const struct file_operations spufs_wbox_info_fops = { + .open = spufs_info_open, + .read = spufs_wbox_info_read, + .llseek = generic_file_llseek, +}; + +static void spufs_get_dma_info(struct spu_context *ctx, + struct spu_dma_info *info) +{ + int i; + + info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; + info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; + info->dma_info_status = ctx->csa.spu_chnldata_RW[24]; + info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; + info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; + for (i = 0; i < 16; i++) { + struct mfc_cq_sr *qp = &info->dma_info_command_data[i]; + struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i]; + + qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW; + } +} + +static ssize_t spufs_dma_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + struct spu_dma_info info; + + spufs_get_dma_info(ctx, &info); + return spufs_dump_emit(cprm, &info, sizeof(info)); +} + +static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_dma_info info; + int ret; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + spufs_get_dma_info(ctx, &info); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); +} + +static const struct file_operations spufs_dma_info_fops = { + .open = spufs_info_open, + .read = spufs_dma_info_read, + .llseek = no_llseek, +}; + +static void spufs_get_proxydma_info(struct spu_context *ctx, + struct spu_proxydma_info *info) +{ + int i; + + info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW; + info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; + info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; + + for (i = 0; i < 8; i++) { + struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i]; + struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i]; + + qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; + } +} + +static ssize_t spufs_proxydma_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + struct spu_proxydma_info info; + + spufs_get_proxydma_info(ctx, &info); + return spufs_dump_emit(cprm, &info, sizeof(info)); +} + +static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct spu_proxydma_info info; + int ret; + + if (len < sizeof(info)) + return -EINVAL; + + ret = spu_acquire_saved(ctx); + if (ret) + return ret; + spin_lock(&ctx->csa.register_lock); + spufs_get_proxydma_info(ctx, &info); + spin_unlock(&ctx->csa.register_lock); + spu_release_saved(ctx); + + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); +} + +static const struct file_operations spufs_proxydma_info_fops = { + .open = spufs_info_open, + .read = spufs_proxydma_info_read, + .llseek = no_llseek, +}; + +static int spufs_show_tid(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + + seq_printf(s, "%d\n", ctx->tid); + return 0; +} + +static int spufs_tid_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_tid_fops = { + .open = spufs_tid_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static const char *ctx_state_names[] = { + "user", "system", "iowait", "loaded" +}; + +static unsigned long long spufs_acct_time(struct spu_context *ctx, + enum spu_utilization_state state) +{ + unsigned long long time = ctx->stats.times[state]; + + /* + * In general, utilization statistics are updated by the controlling + * thread as the spu context moves through various well defined + * state transitions, but if the context is lazily loaded its + * utilization statistics are not updated as the controlling thread + * is not tightly coupled with the execution of the spu context. We + * calculate and apply the time delta from the last recorded state + * of the spu context. + */ + if (ctx->spu && ctx->stats.util_state == state) { + time += ktime_get_ns() - ctx->stats.tstamp; + } + + return time / NSEC_PER_MSEC; +} + +static unsigned long long spufs_slb_flts(struct spu_context *ctx) +{ + unsigned long long slb_flts = ctx->stats.slb_flt; + + if (ctx->state == SPU_STATE_RUNNABLE) { + slb_flts += (ctx->spu->stats.slb_flt - + ctx->stats.slb_flt_base); + } + + return slb_flts; +} + +static unsigned long long spufs_class2_intrs(struct spu_context *ctx) +{ + unsigned long long class2_intrs = ctx->stats.class2_intr; + + if (ctx->state == SPU_STATE_RUNNABLE) { + class2_intrs += (ctx->spu->stats.class2_intr - + ctx->stats.class2_intr_base); + } + + return class2_intrs; +} + + +static int spufs_show_stat(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + int ret; + + ret = spu_acquire(ctx); + if (ret) + return ret; + + seq_printf(s, "%s %llu %llu %llu %llu " + "%llu %llu %llu %llu %llu %llu %llu %llu\n", + ctx_state_names[ctx->stats.util_state], + spufs_acct_time(ctx, SPU_UTIL_USER), + spufs_acct_time(ctx, SPU_UTIL_SYSTEM), + spufs_acct_time(ctx, SPU_UTIL_IOWAIT), + spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED), + ctx->stats.vol_ctx_switch, + ctx->stats.invol_ctx_switch, + spufs_slb_flts(ctx), + ctx->stats.hash_flt, + ctx->stats.min_flt, + ctx->stats.maj_flt, + spufs_class2_intrs(ctx), + ctx->stats.libassist); + spu_release(ctx); + return 0; +} + +static int spufs_stat_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_stat_fops = { + .open = spufs_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static inline int spufs_switch_log_used(struct spu_context *ctx) +{ + return (ctx->switch_log->head - ctx->switch_log->tail) % + SWITCH_LOG_BUFSIZE; +} + +static inline int spufs_switch_log_avail(struct spu_context *ctx) +{ + return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx); +} + +static int spufs_switch_log_open(struct inode *inode, struct file *file) +{ + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + int rc; + + rc = spu_acquire(ctx); + if (rc) + return rc; + + if (ctx->switch_log) { + rc = -EBUSY; + goto out; + } + + ctx->switch_log = kmalloc(struct_size(ctx->switch_log, log, + SWITCH_LOG_BUFSIZE), GFP_KERNEL); + + if (!ctx->switch_log) { + rc = -ENOMEM; + goto out; + } + + ctx->switch_log->head = ctx->switch_log->tail = 0; + init_waitqueue_head(&ctx->switch_log->wait); + rc = 0; + +out: + spu_release(ctx); + return rc; +} + +static int spufs_switch_log_release(struct inode *inode, struct file *file) +{ + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + int rc; + + rc = spu_acquire(ctx); + if (rc) + return rc; + + kfree(ctx->switch_log); + ctx->switch_log = NULL; + spu_release(ctx); + + return 0; +} + +static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n) +{ + struct switch_log_entry *p; + + p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE; + + return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n", + (unsigned long long) p->tstamp.tv_sec, + (unsigned int) p->tstamp.tv_nsec, + p->spu_id, + (unsigned int) p->type, + (unsigned int) p->val, + (unsigned long long) p->timebase); +} + +static ssize_t spufs_switch_log_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct inode *inode = file_inode(file); + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + int error = 0, cnt = 0; + + if (!buf) + return -EINVAL; + + error = spu_acquire(ctx); + if (error) + return error; + + while (cnt < len) { + char tbuf[128]; + int width; + + if (spufs_switch_log_used(ctx) == 0) { + if (cnt > 0) { + /* If there's data ready to go, we can + * just return straight away */ + break; + + } else if (file->f_flags & O_NONBLOCK) { + error = -EAGAIN; + break; + + } else { + /* spufs_wait will drop the mutex and + * re-acquire, but since we're in read(), the + * file cannot be _released (and so + * ctx->switch_log is stable). + */ + error = spufs_wait(ctx->switch_log->wait, + spufs_switch_log_used(ctx) > 0); + + /* On error, spufs_wait returns without the + * state mutex held */ + if (error) + return error; + + /* We may have had entries read from underneath + * us while we dropped the mutex in spufs_wait, + * so re-check */ + if (spufs_switch_log_used(ctx) == 0) + continue; + } + } + + width = switch_log_sprint(ctx, tbuf, sizeof(tbuf)); + if (width < len) + ctx->switch_log->tail = + (ctx->switch_log->tail + 1) % + SWITCH_LOG_BUFSIZE; + else + /* If the record is greater than space available return + * partial buffer (so far) */ + break; + + error = copy_to_user(buf + cnt, tbuf, width); + if (error) + break; + cnt += width; + } + + spu_release(ctx); + + return cnt == 0 ? error : cnt; +} + +static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait) +{ + struct inode *inode = file_inode(file); + struct spu_context *ctx = SPUFS_I(inode)->i_ctx; + __poll_t mask = 0; + int rc; + + poll_wait(file, &ctx->switch_log->wait, wait); + + rc = spu_acquire(ctx); + if (rc) + return rc; + + if (spufs_switch_log_used(ctx) > 0) + mask |= EPOLLIN; + + spu_release(ctx); + + return mask; +} + +static const struct file_operations spufs_switch_log_fops = { + .open = spufs_switch_log_open, + .read = spufs_switch_log_read, + .poll = spufs_switch_log_poll, + .release = spufs_switch_log_release, + .llseek = no_llseek, +}; + +/** + * Log a context switch event to a switch log reader. + * + * Must be called with ctx->state_mutex held. + */ +void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, + u32 type, u32 val) +{ + if (!ctx->switch_log) + return; + + if (spufs_switch_log_avail(ctx) > 1) { + struct switch_log_entry *p; + + p = ctx->switch_log->log + ctx->switch_log->head; + ktime_get_ts64(&p->tstamp); + p->timebase = get_tb(); + p->spu_id = spu ? spu->number : -1; + p->type = type; + p->val = val; + + ctx->switch_log->head = + (ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE; + } + + wake_up(&ctx->switch_log->wait); +} + +static int spufs_show_ctx(struct seq_file *s, void *private) +{ + struct spu_context *ctx = s->private; + u64 mfc_control_RW; + + mutex_lock(&ctx->state_mutex); + if (ctx->spu) { + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + + spin_lock_irq(&spu->register_lock); + mfc_control_RW = in_be64(&priv2->mfc_control_RW); + spin_unlock_irq(&spu->register_lock); + } else { + struct spu_state *csa = &ctx->csa; + + mfc_control_RW = csa->priv2.mfc_control_RW; + } + + seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)" + " %c %llx %llx %llx %llx %x %x\n", + ctx->state == SPU_STATE_SAVED ? 'S' : 'R', + ctx->flags, + ctx->sched_flags, + ctx->prio, + ctx->time_slice, + ctx->spu ? ctx->spu->number : -1, + !list_empty(&ctx->rq) ? 'q' : ' ', + ctx->csa.class_0_pending, + ctx->csa.class_0_dar, + ctx->csa.class_1_dsisr, + mfc_control_RW, + ctx->ops->runcntl_read(ctx), + ctx->ops->status_read(ctx)); + + mutex_unlock(&ctx->state_mutex); + + return 0; +} + +static int spufs_ctx_open(struct inode *inode, struct file *file) +{ + return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx); +} + +static const struct file_operations spufs_ctx_fops = { + .open = spufs_ctx_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +const struct spufs_tree_descr spufs_dir_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, + { "mem", &spufs_mem_fops, 0666, LS_SIZE, }, + { "regs", &spufs_regs_fops, 0666, sizeof(struct spu_reg128[128]), }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, + { "wbox", &spufs_wbox_fops, 0222, }, + { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), }, + { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), }, + { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), }, + { "signal1", &spufs_signal1_fops, 0666, }, + { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1_type", &spufs_signal1_type, 0666, }, + { "signal2_type", &spufs_signal2_type, 0666, }, + { "cntl", &spufs_cntl_fops, 0666, }, + { "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), }, + { "lslr", &spufs_lslr_ops, 0444, }, + { "mfc", &spufs_mfc_fops, 0666, }, + { "mss", &spufs_mss_fops, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, + { "srr0", &spufs_srr0_ops, 0666, }, + { "decr", &spufs_decr_ops, 0666, }, + { "decr_status", &spufs_decr_status_ops, 0666, }, + { "event_mask", &spufs_event_mask_ops, 0666, }, + { "event_status", &spufs_event_status_ops, 0444, }, + { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, }, + { "phys-id", &spufs_id_ops, 0666, }, + { "object-id", &spufs_object_id_ops, 0666, }, + { "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), }, + { "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), }, + { "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), }, + { "dma_info", &spufs_dma_info_fops, 0444, + sizeof(struct spu_dma_info), }, + { "proxydma_info", &spufs_proxydma_info_fops, 0444, + sizeof(struct spu_proxydma_info)}, + { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, + { "switch_log", &spufs_switch_log_fops, 0444 }, + {}, +}; + +const struct spufs_tree_descr spufs_dir_nosched_contents[] = { + { "capabilities", &spufs_caps_fops, 0444, }, + { "mem", &spufs_mem_fops, 0666, LS_SIZE, }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, + { "wbox", &spufs_wbox_fops, 0222, }, + { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), }, + { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), }, + { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), }, + { "signal1", &spufs_signal1_nosched_fops, 0222, }, + { "signal2", &spufs_signal2_nosched_fops, 0222, }, + { "signal1_type", &spufs_signal1_type, 0666, }, + { "signal2_type", &spufs_signal2_type, 0666, }, + { "mss", &spufs_mss_fops, 0666, }, + { "mfc", &spufs_mfc_fops, 0666, }, + { "cntl", &spufs_cntl_fops, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, + { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, }, + { "phys-id", &spufs_id_ops, 0666, }, + { "object-id", &spufs_object_id_ops, 0666, }, + { "tid", &spufs_tid_fops, 0444, }, + { "stat", &spufs_stat_fops, 0444, }, + {}, +}; + +const struct spufs_tree_descr spufs_dir_debug_contents[] = { + { ".ctx", &spufs_ctx_fops, 0444, }, + {}, +}; + +const struct spufs_coredump_reader spufs_coredump_read[] = { + { "regs", spufs_regs_dump, NULL, sizeof(struct spu_reg128[128])}, + { "fpcr", spufs_fpcr_dump, NULL, sizeof(struct spu_reg128) }, + { "lslr", NULL, spufs_lslr_get, 19 }, + { "decr", NULL, spufs_decr_get, 19 }, + { "decr_status", NULL, spufs_decr_status_get, 19 }, + { "mem", spufs_mem_dump, NULL, LS_SIZE, }, + { "signal1", spufs_signal1_dump, NULL, sizeof(u32) }, + { "signal1_type", NULL, spufs_signal1_type_get, 19 }, + { "signal2", spufs_signal2_dump, NULL, sizeof(u32) }, + { "signal2_type", NULL, spufs_signal2_type_get, 19 }, + { "event_mask", NULL, spufs_event_mask_get, 19 }, + { "event_status", NULL, spufs_event_status_get, 19 }, + { "mbox_info", spufs_mbox_info_dump, NULL, sizeof(u32) }, + { "ibox_info", spufs_ibox_info_dump, NULL, sizeof(u32) }, + { "wbox_info", spufs_wbox_info_dump, NULL, 4 * sizeof(u32)}, + { "dma_info", spufs_dma_info_dump, NULL, sizeof(struct spu_dma_info)}, + { "proxydma_info", spufs_proxydma_info_dump, + NULL, sizeof(struct spu_proxydma_info)}, + { "object-id", NULL, spufs_object_id_get, 19 }, + { "npc", NULL, spufs_npc_get, 19 }, + { NULL }, +}; diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c new file mode 100644 index 0000000000..827d338dea --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/gang.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ + +#include +#include + +#include "spufs.h" + +struct spu_gang *alloc_spu_gang(void) +{ + struct spu_gang *gang; + + gang = kzalloc(sizeof *gang, GFP_KERNEL); + if (!gang) + goto out; + + kref_init(&gang->kref); + mutex_init(&gang->mutex); + mutex_init(&gang->aff_mutex); + INIT_LIST_HEAD(&gang->list); + INIT_LIST_HEAD(&gang->aff_list_head); + +out: + return gang; +} + +static void destroy_spu_gang(struct kref *kref) +{ + struct spu_gang *gang; + gang = container_of(kref, struct spu_gang, kref); + WARN_ON(gang->contexts || !list_empty(&gang->list)); + kfree(gang); +} + +struct spu_gang *get_spu_gang(struct spu_gang *gang) +{ + kref_get(&gang->kref); + return gang; +} + +int put_spu_gang(struct spu_gang *gang) +{ + return kref_put(&gang->kref, &destroy_spu_gang); +} + +void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx) +{ + mutex_lock(&gang->mutex); + ctx->gang = get_spu_gang(gang); + list_add(&ctx->gang_list, &gang->list); + gang->contexts++; + mutex_unlock(&gang->mutex); +} + +void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx) +{ + mutex_lock(&gang->mutex); + WARN_ON(ctx->gang != gang); + if (!list_empty(&ctx->aff_list)) { + list_del_init(&ctx->aff_list); + gang->aff_flags &= ~AFF_OFFSETS_SET; + } + list_del_init(&ctx->gang_list); + gang->contexts--; + mutex_unlock(&gang->mutex); + + put_spu_gang(gang); +} diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c new file mode 100644 index 0000000000..8deaf786ed --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* hw_ops.c - query/set operations on active SPU context. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "spufs.h" + +static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + u32 mbox_stat; + int ret = 0; + + spin_lock_irq(&spu->register_lock); + mbox_stat = in_be32(&prob->mb_stat_R); + if (mbox_stat & 0x0000ff) { + *data = in_be32(&prob->pu_mb_R); + ret = 4; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static u32 spu_hw_mbox_stat_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->mb_stat_R); +} + +static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events) +{ + struct spu *spu = ctx->spu; + __poll_t ret = 0; + u32 stat; + + spin_lock_irq(&spu->register_lock); + stat = in_be32(&spu->problem->mb_stat_R); + + /* if the requested event is there, return the poll + mask, otherwise enable the interrupt to get notified, + but first mark any pending interrupts as done so + we don't get woken up unnecessarily */ + + if (events & (EPOLLIN | EPOLLRDNORM)) { + if (stat & 0xff0000) + ret |= EPOLLIN | EPOLLRDNORM; + else { + spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR); + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); + } + } + if (events & (EPOLLOUT | EPOLLWRNORM)) { + if (stat & 0x00ff00) + ret = EPOLLOUT | EPOLLWRNORM; + else { + spu_int_stat_clear(spu, 2, + CLASS2_MAILBOX_THRESHOLD_INTR); + spu_int_mask_or(spu, 2, + CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR); + } + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + struct spu_priv2 __iomem *priv2 = spu->priv2; + int ret; + + spin_lock_irq(&spu->register_lock); + if (in_be32(&prob->mb_stat_R) & 0xff0000) { + /* read the first available word */ + *data = in_be64(&priv2->puint_mb_R); + ret = 4; + } else { + /* make sure we get woken up by the interrupt */ + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); + ret = 0; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static int spu_hw_wbox_write(struct spu_context *ctx, u32 data) +{ + struct spu *spu = ctx->spu; + struct spu_problem __iomem *prob = spu->problem; + int ret; + + spin_lock_irq(&spu->register_lock); + if (in_be32(&prob->mb_stat_R) & 0x00ff00) { + /* we have space to write wbox_data to */ + out_be32(&prob->spu_mb_W, data); + ret = 4; + } else { + /* make sure we get woken up by the interrupt when space + becomes available */ + spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR); + ret = 0; + } + spin_unlock_irq(&spu->register_lock); + return ret; +} + +static void spu_hw_signal1_write(struct spu_context *ctx, u32 data) +{ + out_be32(&ctx->spu->problem->signal_notify1, data); +} + +static void spu_hw_signal2_write(struct spu_context *ctx, u32 data) +{ + out_be32(&ctx->spu->problem->signal_notify2, data); +} + +static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val) +{ + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 tmp; + + spin_lock_irq(&spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 1; + else + tmp &= ~1; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&spu->register_lock); +} + +static u64 spu_hw_signal1_type_get(struct spu_context *ctx) +{ + return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0); +} + +static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val) +{ + struct spu *spu = ctx->spu; + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 tmp; + + spin_lock_irq(&spu->register_lock); + tmp = in_be64(&priv2->spu_cfg_RW); + if (val) + tmp |= 2; + else + tmp &= ~2; + out_be64(&priv2->spu_cfg_RW, tmp); + spin_unlock_irq(&spu->register_lock); +} + +static u64 spu_hw_signal2_type_get(struct spu_context *ctx) +{ + return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0); +} + +static u32 spu_hw_npc_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_npc_RW); +} + +static void spu_hw_npc_write(struct spu_context *ctx, u32 val) +{ + out_be32(&ctx->spu->problem->spu_npc_RW, val); +} + +static u32 spu_hw_status_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_status_R); +} + +static char *spu_hw_get_ls(struct spu_context *ctx) +{ + return ctx->spu->local_store; +} + +static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val) +{ + out_be64(&ctx->spu->priv2->spu_privcntl_RW, val); +} + +static u32 spu_hw_runcntl_read(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->spu_runcntl_RW); +} + +static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val) +{ + spin_lock_irq(&ctx->spu->register_lock); + if (val & SPU_RUNCNTL_ISOLATE) + spu_hw_privcntl_write(ctx, + SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK); + out_be32(&ctx->spu->problem->spu_runcntl_RW, val); + spin_unlock_irq(&ctx->spu->register_lock); +} + +static void spu_hw_runcntl_stop(struct spu_context *ctx) +{ + spin_lock_irq(&ctx->spu->register_lock); + out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP); + while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING) + cpu_relax(); + spin_unlock_irq(&ctx->spu->register_lock); +} + +static void spu_hw_master_start(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + u64 sr1; + + spin_lock_irq(&spu->register_lock); + sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, sr1); + spin_unlock_irq(&spu->register_lock); +} + +static void spu_hw_master_stop(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + u64 sr1; + + spin_lock_irq(&spu->register_lock); + sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, sr1); + spin_unlock_irq(&spu->register_lock); +} + +static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode) +{ + struct spu_problem __iomem *prob = ctx->spu->problem; + int ret; + + spin_lock_irq(&ctx->spu->register_lock); + ret = -EAGAIN; + if (in_be32(&prob->dma_querytype_RW)) + goto out; + ret = 0; + out_be32(&prob->dma_querymask_RW, mask); + out_be32(&prob->dma_querytype_RW, mode); +out: + spin_unlock_irq(&ctx->spu->register_lock); + return ret; +} + +static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx) +{ + return in_be32(&ctx->spu->problem->dma_tagstatus_R); +} + +static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->dma_qstatus_R); +} + +static int spu_hw_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command *cmd) +{ + u32 status; + struct spu_problem __iomem *prob = ctx->spu->problem; + + spin_lock_irq(&ctx->spu->register_lock); + out_be32(&prob->mfc_lsa_W, cmd->lsa); + out_be64(&prob->mfc_ea_W, cmd->ea); + out_be32(&prob->mfc_union_W.by32.mfc_size_tag32, + cmd->size << 16 | cmd->tag); + out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32, + cmd->class << 16 | cmd->cmd); + status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32); + spin_unlock_irq(&ctx->spu->register_lock); + + switch (status & 0xffff) { + case 0: + return 0; + case 2: + return -EAGAIN; + default: + return -EINVAL; + } +} + +static void spu_hw_restart_dma(struct spu_context *ctx) +{ + struct spu_priv2 __iomem *priv2 = ctx->spu->priv2; + + if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags)) + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND); +} + +struct spu_context_ops spu_hw_ops = { + .mbox_read = spu_hw_mbox_read, + .mbox_stat_read = spu_hw_mbox_stat_read, + .mbox_stat_poll = spu_hw_mbox_stat_poll, + .ibox_read = spu_hw_ibox_read, + .wbox_write = spu_hw_wbox_write, + .signal1_write = spu_hw_signal1_write, + .signal2_write = spu_hw_signal2_write, + .signal1_type_set = spu_hw_signal1_type_set, + .signal1_type_get = spu_hw_signal1_type_get, + .signal2_type_set = spu_hw_signal2_type_set, + .signal2_type_get = spu_hw_signal2_type_get, + .npc_read = spu_hw_npc_read, + .npc_write = spu_hw_npc_write, + .status_read = spu_hw_status_read, + .get_ls = spu_hw_get_ls, + .privcntl_write = spu_hw_privcntl_write, + .runcntl_read = spu_hw_runcntl_read, + .runcntl_write = spu_hw_runcntl_write, + .runcntl_stop = spu_hw_runcntl_stop, + .master_start = spu_hw_master_start, + .master_stop = spu_hw_master_stop, + .set_mfc_query = spu_hw_set_mfc_query, + .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus, + .get_mfc_free_elements = spu_hw_get_mfc_free_elements, + .send_mfc_command = spu_hw_send_mfc_command, + .restart_dma = spu_hw_restart_dma, +}; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c new file mode 100644 index 0000000000..38c5be34c8 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -0,0 +1,826 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "spufs.h" + +struct spufs_sb_info { + bool debug; +}; + +static struct kmem_cache *spufs_inode_cache; +char *isolated_loader; +static int isolated_loader_size; + +static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static struct inode * +spufs_alloc_inode(struct super_block *sb) +{ + struct spufs_inode_info *ei; + + ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL); + if (!ei) + return NULL; + + ei->i_gang = NULL; + ei->i_ctx = NULL; + ei->i_openers = 0; + + return &ei->vfs_inode; +} + +static void spufs_free_inode(struct inode *inode) +{ + kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); +} + +static void +spufs_init_once(void *p) +{ + struct spufs_inode_info *ei = p; + + inode_init_once(&ei->vfs_inode); +} + +static struct inode * +spufs_new_inode(struct super_block *sb, umode_t mode) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + goto out; + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); +out: + return inode; +} + +static int +spufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +{ + struct inode *inode = d_inode(dentry); + + if ((attr->ia_valid & ATTR_SIZE) && + (attr->ia_size != inode->i_size)) + return -EINVAL; + setattr_copy(&nop_mnt_idmap, inode, attr); + mark_inode_dirty(inode); + return 0; +} + + +static int +spufs_new_file(struct super_block *sb, struct dentry *dentry, + const struct file_operations *fops, umode_t mode, + size_t size, struct spu_context *ctx) +{ + static const struct inode_operations spufs_file_iops = { + .setattr = spufs_setattr, + }; + struct inode *inode; + int ret; + + ret = -ENOSPC; + inode = spufs_new_inode(sb, S_IFREG | mode); + if (!inode) + goto out; + + ret = 0; + inode->i_op = &spufs_file_iops; + inode->i_fop = fops; + inode->i_size = size; + inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); + d_add(dentry, inode); +out: + return ret; +} + +static void +spufs_evict_inode(struct inode *inode) +{ + struct spufs_inode_info *ei = SPUFS_I(inode); + clear_inode(inode); + if (ei->i_ctx) + put_spu_context(ei->i_ctx); + if (ei->i_gang) + put_spu_gang(ei->i_gang); +} + +static void spufs_prune_dir(struct dentry *dir) +{ + struct dentry *dentry, *tmp; + + inode_lock(d_inode(dir)); + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { + spin_lock(&dentry->d_lock); + if (simple_positive(dentry)) { + dget_dlock(dentry); + __d_drop(dentry); + spin_unlock(&dentry->d_lock); + simple_unlink(d_inode(dir), dentry); + /* XXX: what was dcache_lock protecting here? Other + * filesystems (IB, configfs) release dcache_lock + * before unlink */ + dput(dentry); + } else { + spin_unlock(&dentry->d_lock); + } + } + shrink_dcache_parent(dir); + inode_unlock(d_inode(dir)); +} + +/* Caller must hold parent->i_mutex */ +static int spufs_rmdir(struct inode *parent, struct dentry *dir) +{ + /* remove all entries */ + int res; + spufs_prune_dir(dir); + d_drop(dir); + res = simple_rmdir(parent, dir); + /* We have to give up the mm_struct */ + spu_forget(SPUFS_I(d_inode(dir))->i_ctx); + return res; +} + +static int spufs_fill_dir(struct dentry *dir, + const struct spufs_tree_descr *files, umode_t mode, + struct spu_context *ctx) +{ + while (files->name && files->name[0]) { + int ret; + struct dentry *dentry = d_alloc_name(dir, files->name); + if (!dentry) + return -ENOMEM; + ret = spufs_new_file(dir->d_sb, dentry, files->ops, + files->mode & mode, files->size, ctx); + if (ret) + return ret; + files++; + } + return 0; +} + +static int spufs_dir_close(struct inode *inode, struct file *file) +{ + struct inode *parent; + struct dentry *dir; + int ret; + + dir = file->f_path.dentry; + parent = d_inode(dir->d_parent); + + inode_lock_nested(parent, I_MUTEX_PARENT); + ret = spufs_rmdir(parent, dir); + inode_unlock(parent); + WARN_ON(ret); + + return dcache_dir_close(inode, file); +} + +const struct file_operations spufs_context_fops = { + .open = dcache_dir_open, + .release = spufs_dir_close, + .llseek = dcache_dir_lseek, + .read = generic_read_dir, + .iterate_shared = dcache_readdir, + .fsync = noop_fsync, +}; +EXPORT_SYMBOL_GPL(spufs_context_fops); + +static int +spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, + umode_t mode) +{ + int ret; + struct inode *inode; + struct spu_context *ctx; + + inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); + if (!inode) + return -ENOSPC; + + inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR); + ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */ + SPUFS_I(inode)->i_ctx = ctx; + if (!ctx) { + iput(inode); + return -ENOSPC; + } + + ctx->flags = flags; + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + inode_lock(inode); + + dget(dentry); + inc_nlink(dir); + inc_nlink(inode); + + d_instantiate(dentry, inode); + + if (flags & SPU_CREATE_NOSCHED) + ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents, + mode, ctx); + else + ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx); + + if (!ret && spufs_get_sb_info(dir->i_sb)->debug) + ret = spufs_fill_dir(dentry, spufs_dir_debug_contents, + mode, ctx); + + if (ret) + spufs_rmdir(dir, dentry); + + inode_unlock(inode); + + return ret; +} + +static int spufs_context_open(const struct path *path) +{ + int ret; + struct file *filp; + + ret = get_unused_fd_flags(0); + if (ret < 0) + return ret; + + filp = dentry_open(path, O_RDONLY, current_cred()); + if (IS_ERR(filp)) { + put_unused_fd(ret); + return PTR_ERR(filp); + } + + filp->f_op = &spufs_context_fops; + fd_install(ret, filp); + return ret; +} + +static struct spu_context * +spufs_assert_affinity(unsigned int flags, struct spu_gang *gang, + struct file *filp) +{ + struct spu_context *tmp, *neighbor, *err; + int count, node; + int aff_supp; + + aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next, + struct spu, cbe_list))->aff_list); + + if (!aff_supp) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_GANG) + return ERR_PTR(-EINVAL); + + if (flags & SPU_CREATE_AFFINITY_MEM && + gang->aff_ref_ctx && + gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM) + return ERR_PTR(-EEXIST); + + if (gang->aff_flags & AFF_MERGED) + return ERR_PTR(-EBUSY); + + neighbor = NULL; + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (!filp || filp->f_op != &spufs_context_fops) + return ERR_PTR(-EINVAL); + + neighbor = get_spu_context( + SPUFS_I(file_inode(filp))->i_ctx); + + if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) && + !list_is_last(&neighbor->aff_list, &gang->aff_list_head) && + !list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) { + err = ERR_PTR(-EEXIST); + goto out_put_neighbor; + } + + if (gang != neighbor->gang) { + err = ERR_PTR(-EINVAL); + goto out_put_neighbor; + } + + count = 1; + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + count++; + if (list_empty(&neighbor->aff_list)) + count++; + + for (node = 0; node < MAX_NUMNODES; node++) { + if ((cbe_spu_info[node].n_spus - atomic_read( + &cbe_spu_info[node].reserved_spus)) >= count) + break; + } + + if (node == MAX_NUMNODES) { + err = ERR_PTR(-EEXIST); + goto out_put_neighbor; + } + } + + return neighbor; + +out_put_neighbor: + put_spu_context(neighbor); + return err; +} + +static void +spufs_set_affinity(unsigned int flags, struct spu_context *ctx, + struct spu_context *neighbor) +{ + if (flags & SPU_CREATE_AFFINITY_MEM) + ctx->gang->aff_ref_ctx = ctx; + + if (flags & SPU_CREATE_AFFINITY_SPU) { + if (list_empty(&neighbor->aff_list)) { + list_add_tail(&neighbor->aff_list, + &ctx->gang->aff_list_head); + neighbor->aff_head = 1; + } + + if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head) + || list_entry(neighbor->aff_list.next, struct spu_context, + aff_list)->aff_head) { + list_add(&ctx->aff_list, &neighbor->aff_list); + } else { + list_add_tail(&ctx->aff_list, &neighbor->aff_list); + if (neighbor->aff_head) { + neighbor->aff_head = 0; + ctx->aff_head = 1; + } + } + + if (!ctx->gang->aff_ref_ctx) + ctx->gang->aff_ref_ctx = ctx; + } +} + +static int +spufs_create_context(struct inode *inode, struct dentry *dentry, + struct vfsmount *mnt, int flags, umode_t mode, + struct file *aff_filp) +{ + int ret; + int affinity; + struct spu_gang *gang; + struct spu_context *neighbor; + struct path path = {.mnt = mnt, .dentry = dentry}; + + if ((flags & SPU_CREATE_NOSCHED) && + !capable(CAP_SYS_NICE)) + return -EPERM; + + if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE)) + == SPU_CREATE_ISOLATE) + return -EINVAL; + + if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) + return -ENODEV; + + gang = NULL; + neighbor = NULL; + affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU); + if (affinity) { + gang = SPUFS_I(inode)->i_gang; + if (!gang) + return -EINVAL; + mutex_lock(&gang->aff_mutex); + neighbor = spufs_assert_affinity(flags, gang, aff_filp); + if (IS_ERR(neighbor)) { + ret = PTR_ERR(neighbor); + goto out_aff_unlock; + } + } + + ret = spufs_mkdir(inode, dentry, flags, mode & 0777); + if (ret) + goto out_aff_unlock; + + if (affinity) { + spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx, + neighbor); + if (neighbor) + put_spu_context(neighbor); + } + + ret = spufs_context_open(&path); + if (ret < 0) + WARN_ON(spufs_rmdir(inode, dentry)); + +out_aff_unlock: + if (affinity) + mutex_unlock(&gang->aff_mutex); + return ret; +} + +static int +spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + int ret; + struct inode *inode; + struct spu_gang *gang; + + ret = -ENOSPC; + inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR); + if (!inode) + goto out; + + ret = 0; + inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR); + gang = alloc_spu_gang(); + SPUFS_I(inode)->i_ctx = NULL; + SPUFS_I(inode)->i_gang = gang; + if (!gang) { + ret = -ENOMEM; + goto out_iput; + } + + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + d_instantiate(dentry, inode); + inc_nlink(dir); + inc_nlink(d_inode(dentry)); + return ret; + +out_iput: + iput(inode); +out: + return ret; +} + +static int spufs_gang_open(const struct path *path) +{ + int ret; + struct file *filp; + + ret = get_unused_fd_flags(0); + if (ret < 0) + return ret; + + /* + * get references for dget and mntget, will be released + * in error path of *_open(). + */ + filp = dentry_open(path, O_RDONLY, current_cred()); + if (IS_ERR(filp)) { + put_unused_fd(ret); + return PTR_ERR(filp); + } + + filp->f_op = &simple_dir_operations; + fd_install(ret, filp); + return ret; +} + +static int spufs_create_gang(struct inode *inode, + struct dentry *dentry, + struct vfsmount *mnt, umode_t mode) +{ + struct path path = {.mnt = mnt, .dentry = dentry}; + int ret; + + ret = spufs_mkgang(inode, dentry, mode & 0777); + if (!ret) { + ret = spufs_gang_open(&path); + if (ret < 0) { + int err = simple_rmdir(inode, dentry); + WARN_ON(err); + } + } + return ret; +} + + +static struct file_system_type spufs_type; + +long spufs_create(const struct path *path, struct dentry *dentry, + unsigned int flags, umode_t mode, struct file *filp) +{ + struct inode *dir = d_inode(path->dentry); + int ret; + + /* check if we are on spufs */ + if (path->dentry->d_sb->s_type != &spufs_type) + return -EINVAL; + + /* don't accept undefined flags */ + if (flags & (~SPU_CREATE_FLAG_ALL)) + return -EINVAL; + + /* only threads can be underneath a gang */ + if (path->dentry != path->dentry->d_sb->s_root) + if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang) + return -EINVAL; + + mode &= ~current_umask(); + + if (flags & SPU_CREATE_GANG) + ret = spufs_create_gang(dir, dentry, path->mnt, mode); + else + ret = spufs_create_context(dir, dentry, path->mnt, flags, mode, + filp); + if (ret >= 0) + fsnotify_mkdir(dir, dentry); + + return ret; +} + +/* File system initialization */ +struct spufs_fs_context { + kuid_t uid; + kgid_t gid; + umode_t mode; +}; + +enum { + Opt_uid, Opt_gid, Opt_mode, Opt_debug, +}; + +static const struct fs_parameter_spec spufs_fs_parameters[] = { + fsparam_u32 ("gid", Opt_gid), + fsparam_u32oct ("mode", Opt_mode), + fsparam_u32 ("uid", Opt_uid), + fsparam_flag ("debug", Opt_debug), + {} +}; + +static int spufs_show_options(struct seq_file *m, struct dentry *root) +{ + struct spufs_sb_info *sbi = spufs_get_sb_info(root->d_sb); + struct inode *inode = root->d_inode; + + if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, inode->i_uid)); + if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, inode->i_gid)); + if ((inode->i_mode & S_IALLUGO) != 0775) + seq_printf(m, ",mode=%o", inode->i_mode); + if (sbi->debug) + seq_puts(m, ",debug"); + return 0; +} + +static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct spufs_fs_context *ctx = fc->fs_private; + struct spufs_sb_info *sbi = fc->s_fs_info; + struct fs_parse_result result; + kuid_t uid; + kgid_t gid; + int opt; + + opt = fs_parse(fc, spufs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + return invalf(fc, "Unknown uid"); + ctx->uid = uid; + break; + case Opt_gid: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + return invalf(fc, "Unknown gid"); + ctx->gid = gid; + break; + case Opt_mode: + ctx->mode = result.uint_32 & S_IALLUGO; + break; + case Opt_debug: + sbi->debug = true; + break; + } + + return 0; +} + +static void spufs_exit_isolated_loader(void) +{ + free_pages((unsigned long) isolated_loader, + get_order(isolated_loader_size)); +} + +static void __init +spufs_init_isolated_loader(void) +{ + struct device_node *dn; + const char *loader; + int size; + + dn = of_find_node_by_path("/spu-isolation"); + if (!dn) + return; + + loader = of_get_property(dn, "loader", &size); + of_node_put(dn); + if (!loader) + return; + + /* the loader must be align on a 16 byte boundary */ + isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size)); + if (!isolated_loader) + return; + + isolated_loader_size = size; + memcpy(isolated_loader, loader, size); + printk(KERN_INFO "spufs: SPU isolation mode enabled\n"); +} + +static int spufs_create_root(struct super_block *sb, struct fs_context *fc) +{ + struct spufs_fs_context *ctx = fc->fs_private; + struct inode *inode; + + if (!spu_management_ops) + return -ENODEV; + + inode = spufs_new_inode(sb, S_IFDIR | ctx->mode); + if (!inode) + return -ENOMEM; + + inode->i_uid = ctx->uid; + inode->i_gid = ctx->gid; + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + SPUFS_I(inode)->i_ctx = NULL; + inc_nlink(inode); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + return 0; +} + +static const struct super_operations spufs_ops = { + .alloc_inode = spufs_alloc_inode, + .free_inode = spufs_free_inode, + .statfs = simple_statfs, + .evict_inode = spufs_evict_inode, + .show_options = spufs_show_options, +}; + +static int spufs_fill_super(struct super_block *sb, struct fs_context *fc) +{ + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = SPUFS_MAGIC; + sb->s_op = &spufs_ops; + + return spufs_create_root(sb, fc); +} + +static int spufs_get_tree(struct fs_context *fc) +{ + return get_tree_single(fc, spufs_fill_super); +} + +static void spufs_free_fc(struct fs_context *fc) +{ + kfree(fc->s_fs_info); +} + +static const struct fs_context_operations spufs_context_ops = { + .free = spufs_free_fc, + .parse_param = spufs_parse_param, + .get_tree = spufs_get_tree, +}; + +static int spufs_init_fs_context(struct fs_context *fc) +{ + struct spufs_fs_context *ctx; + struct spufs_sb_info *sbi; + + ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL); + if (!ctx) + goto nomem; + + sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL); + if (!sbi) + goto nomem_ctx; + + ctx->uid = current_uid(); + ctx->gid = current_gid(); + ctx->mode = 0755; + + fc->fs_private = ctx; + fc->s_fs_info = sbi; + fc->ops = &spufs_context_ops; + return 0; + +nomem_ctx: + kfree(ctx); +nomem: + return -ENOMEM; +} + +static struct file_system_type spufs_type = { + .owner = THIS_MODULE, + .name = "spufs", + .init_fs_context = spufs_init_fs_context, + .parameters = spufs_fs_parameters, + .kill_sb = kill_litter_super, +}; +MODULE_ALIAS_FS("spufs"); + +static int __init spufs_init(void) +{ + int ret; + + ret = -ENODEV; + if (!spu_management_ops) + goto out; + + ret = -ENOMEM; + spufs_inode_cache = kmem_cache_create("spufs_inode_cache", + sizeof(struct spufs_inode_info), 0, + SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, spufs_init_once); + + if (!spufs_inode_cache) + goto out; + ret = spu_sched_init(); + if (ret) + goto out_cache; + ret = register_spu_syscalls(&spufs_calls); + if (ret) + goto out_sched; + ret = register_filesystem(&spufs_type); + if (ret) + goto out_syscalls; + + spufs_init_isolated_loader(); + + return 0; + +out_syscalls: + unregister_spu_syscalls(&spufs_calls); +out_sched: + spu_sched_exit(); +out_cache: + kmem_cache_destroy(spufs_inode_cache); +out: + return ret; +} +module_init(spufs_init); + +static void __exit spufs_exit(void) +{ + spu_sched_exit(); + spufs_exit_isolated_loader(); + unregister_spu_syscalls(&spufs_calls); + unregister_filesystem(&spufs_type); + kmem_cache_destroy(spufs_inode_cache); +} +module_exit(spufs_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnd Bergmann "); + diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c new file mode 100644 index 0000000000..43b9dde7fd --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SPU local store allocation routines + * + * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. + */ + +#undef DEBUG + +#include +#include +#include +#include + +#include +#include +#include + +#include "spufs.h" + +int spu_alloc_lscsa(struct spu_state *csa) +{ + struct spu_lscsa *lscsa; + unsigned char *p; + + lscsa = vzalloc(sizeof(*lscsa)); + if (!lscsa) + return -ENOMEM; + csa->lscsa = lscsa; + + /* Set LS pages reserved to allow for user-space mapping. */ + for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE) + SetPageReserved(vmalloc_to_page(p)); + + return 0; +} + +void spu_free_lscsa(struct spu_state *csa) +{ + /* Clear reserved bit before vfree. */ + unsigned char *p; + + if (csa->lscsa == NULL) + return; + + for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE) + ClearPageReserved(vmalloc_to_page(p)); + + vfree(csa->lscsa); +} diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c new file mode 100644 index 0000000000..ce52b87496 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -0,0 +1,451 @@ +// SPDX-License-Identifier: GPL-2.0 +#define DEBUG + +#include +#include + +#include +#include +#include +#include + +#include "spufs.h" + +/* interrupt-level stop callback function. */ +void spufs_stop_callback(struct spu *spu, int irq) +{ + struct spu_context *ctx = spu->ctx; + + /* + * It should be impossible to preempt a context while an exception + * is being processed, since the context switch code is specially + * coded to deal with interrupts ... But, just in case, sanity check + * the context pointer. It is OK to return doing nothing since + * the exception will be regenerated when the context is resumed. + */ + if (ctx) { + /* Copy exception arguments into module specific structure */ + switch(irq) { + case 0 : + ctx->csa.class_0_pending = spu->class_0_pending; + ctx->csa.class_0_dar = spu->class_0_dar; + break; + case 1 : + ctx->csa.class_1_dsisr = spu->class_1_dsisr; + ctx->csa.class_1_dar = spu->class_1_dar; + break; + case 2 : + break; + } + + /* ensure that the exception status has hit memory before a + * thread waiting on the context's stop queue is woken */ + smp_wmb(); + + wake_up_all(&ctx->stop_wq); + } +} + +int spu_stopped(struct spu_context *ctx, u32 *stat) +{ + u64 dsisr; + u32 stopped; + + stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + +top: + *stat = ctx->ops->status_read(ctx); + if (*stat & stopped) { + /* + * If the spu hasn't finished stopping, we need to + * re-read the register to get the stopped value. + */ + if (*stat & SPU_STATUS_RUNNING) + goto top; + return 1; + } + + if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags)) + return 1; + + dsisr = ctx->csa.class_1_dsisr; + if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) + return 1; + + if (ctx->csa.class_0_pending) + return 1; + + return 0; +} + +static int spu_setup_isolated(struct spu_context *ctx) +{ + int ret; + u64 __iomem *mfc_cntl; + u64 sr1; + u32 status; + unsigned long timeout; + const u32 status_loading = SPU_STATUS_RUNNING + | SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS; + + ret = -ENODEV; + if (!isolated_loader) + goto out; + + /* + * We need to exclude userspace access to the context. + * + * To protect against memory access we invalidate all ptes + * and make sure the pagefault handlers block on the mutex. + */ + spu_unmap_mappings(ctx); + + mfc_cntl = &ctx->spu->priv2->mfc_control_RW; + + /* purge the MFC DMA queue to ensure no spurious accesses before we + * enter kernel mode */ + timeout = jiffies + HZ; + out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST); + while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK) + != MFC_CNTL_PURGE_DMA_COMPLETE) { + if (time_after(jiffies, timeout)) { + printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n", + __func__); + ret = -EIO; + goto out; + } + cond_resched(); + } + + /* clear purge status */ + out_be64(mfc_cntl, 0); + + /* put the SPE in kernel mode to allow access to the loader */ + sr1 = spu_mfc_sr1_get(ctx->spu); + sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK; + spu_mfc_sr1_set(ctx->spu, sr1); + + /* start the loader */ + ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32); + ctx->ops->signal2_write(ctx, + (unsigned long)isolated_loader & 0xffffffff); + + ctx->ops->runcntl_write(ctx, + SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); + + ret = 0; + timeout = jiffies + HZ; + while (((status = ctx->ops->status_read(ctx)) & status_loading) == + status_loading) { + if (time_after(jiffies, timeout)) { + printk(KERN_ERR "%s: timeout waiting for loader\n", + __func__); + ret = -EIO; + goto out_drop_priv; + } + cond_resched(); + } + + if (!(status & SPU_STATUS_RUNNING)) { + /* If isolated LOAD has failed: run SPU, we will get a stop-and + * signal later. */ + pr_debug("%s: isolated LOAD failed\n", __func__); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + ret = -EACCES; + goto out_drop_priv; + } + + if (!(status & SPU_STATUS_ISOLATED_STATE)) { + /* This isn't allowed by the CBEA, but check anyway */ + pr_debug("%s: SPU fell out of isolated mode?\n", __func__); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP); + ret = -EINVAL; + goto out_drop_priv; + } + +out_drop_priv: + /* Finished accessing the loader. Drop kernel mode */ + sr1 |= MFC_STATE1_PROBLEM_STATE_MASK; + spu_mfc_sr1_set(ctx->spu, sr1); + +out: + return ret; +} + +static int spu_run_init(struct spu_context *ctx, u32 *npc) +{ + unsigned long runcntl = SPU_RUNCNTL_RUNNABLE; + int ret; + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + /* + * NOSCHED is synchronous scheduling with respect to the caller. + * The caller waits for the context to be loaded. + */ + if (ctx->flags & SPU_CREATE_NOSCHED) { + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + return ret; + } + } + + /* + * Apply special setup as required. + */ + if (ctx->flags & SPU_CREATE_ISOLATE) { + if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) { + ret = spu_setup_isolated(ctx); + if (ret) + return ret; + } + + /* + * If userspace has set the runcntrl register (eg, to + * issue an isolated exit), we need to re-set it here + */ + runcntl = ctx->ops->runcntl_read(ctx) & + (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); + if (runcntl == 0) + runcntl = SPU_RUNCNTL_RUNNABLE; + } else { + unsigned long privcntl; + + if (test_thread_flag(TIF_SINGLESTEP)) + privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP; + else + privcntl = SPU_PRIVCNTL_MODE_NORMAL; + + ctx->ops->privcntl_write(ctx, privcntl); + ctx->ops->npc_write(ctx, *npc); + } + + ctx->ops->runcntl_write(ctx, runcntl); + + if (ctx->flags & SPU_CREATE_NOSCHED) { + spuctx_switch_state(ctx, SPU_UTIL_USER); + } else { + + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + return ret; + } else { + spuctx_switch_state(ctx, SPU_UTIL_USER); + } + } + + set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags); + return 0; +} + +static int spu_run_fini(struct spu_context *ctx, u32 *npc, + u32 *status) +{ + int ret = 0; + + spu_del_from_rq(ctx); + + *status = ctx->ops->status_read(ctx); + *npc = ctx->ops->npc_read(ctx); + + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags); + spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status); + spu_release(ctx); + + if (signal_pending(current)) + ret = -ERESTARTSYS; + + return ret; +} + +/* + * SPU syscall restarting is tricky because we violate the basic + * assumption that the signal handler is running on the interrupted + * thread. Here instead, the handler runs on PowerPC user space code, + * while the syscall was called from the SPU. + * This means we can only do a very rough approximation of POSIX + * signal semantics. + */ +static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, + unsigned int *npc) +{ + int ret; + + switch (*spu_ret) { + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* + * Enter the regular syscall restarting for + * sys_spu_run, then restart the SPU syscall + * callback. + */ + *npc -= 8; + ret = -ERESTARTSYS; + break; + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* + * Restart block is too hard for now, just return -EINTR + * to the SPU. + * ERESTARTNOHAND comes from sys_pause, we also return + * -EINTR from there. + * Assume that we need to be restarted ourselves though. + */ + *spu_ret = -EINTR; + ret = -ERESTARTSYS; + break; + default: + printk(KERN_WARNING "%s: unexpected return code %ld\n", + __func__, *spu_ret); + ret = 0; + } + return ret; +} + +static int spu_process_callback(struct spu_context *ctx) +{ + struct spu_syscall_block s; + u32 ls_pointer, npc; + void __iomem *ls; + long spu_ret; + int ret; + + /* get syscall block from local store */ + npc = ctx->ops->npc_read(ctx) & ~3; + ls = (void __iomem *)ctx->ops->get_ls(ctx); + ls_pointer = in_be32(ls + npc); + if (ls_pointer > (LS_SIZE - sizeof(s))) + return -EFAULT; + memcpy_fromio(&s, ls + ls_pointer, sizeof(s)); + + /* do actual syscall without pinning the spu */ + ret = 0; + spu_ret = -ENOSYS; + npc += 4; + + if (s.nr_ret < NR_syscalls) { + spu_release(ctx); + /* do actual system call from here */ + spu_ret = spu_sys_callback(&s); + if (spu_ret <= -ERESTARTSYS) { + ret = spu_handle_restartsys(ctx, &spu_ret, &npc); + } + mutex_lock(&ctx->state_mutex); + if (ret == -ERESTARTSYS) + return ret; + } + + /* need to re-get the ls, as it may have changed when we released the + * spu */ + ls = (void __iomem *)ctx->ops->get_ls(ctx); + + /* write result, jump over indirect pointer */ + memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret)); + ctx->ops->npc_write(ctx, npc); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + return ret; +} + +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) +{ + int ret; + u32 status; + + if (mutex_lock_interruptible(&ctx->run_mutex)) + return -ERESTARTSYS; + + ctx->event_return = 0; + + ret = spu_acquire(ctx); + if (ret) + goto out_unlock; + + spu_enable_spu(ctx); + + spu_update_sched_info(ctx); + + ret = spu_run_init(ctx, npc); + if (ret) { + spu_release(ctx); + goto out; + } + + do { + ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status)); + if (unlikely(ret)) { + /* + * This is nasty: we need the state_mutex for all the + * bookkeeping even if the syscall was interrupted by + * a signal. ewww. + */ + mutex_lock(&ctx->state_mutex); + break; + } + if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags))) { + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) + continue; + } + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if ((status & SPU_STATUS_STOPPED_BY_STOP) && + (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { + ret = spu_process_callback(ctx); + if (ret) + break; + status &= ~SPU_STATUS_STOPPED_BY_STOP; + } + ret = spufs_handle_class1(ctx); + if (ret) + break; + + ret = spufs_handle_class0(ctx); + if (ret) + break; + + if (signal_pending(current)) + ret = -ERESTARTSYS; + } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_SINGLE_STEP))); + + spu_disable_spu(ctx); + ret = spu_run_fini(ctx, npc, &status); + spu_yield(ctx); + + if ((status & SPU_STATUS_STOPPED_BY_STOP) && + (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100)) + ctx->stats.libassist++; + + if ((ret == 0) || + ((ret == -ERESTARTSYS) && + ((status & SPU_STATUS_STOPPED_BY_HALT) || + (status & SPU_STATUS_SINGLE_STEP) || + ((status & SPU_STATUS_STOPPED_BY_STOP) && + (status >> SPU_STOP_STATUS_SHIFT != 0x2104))))) + ret = status; + + /* Note: we don't need to force_sig SIGTRAP on single-step + * since we have TIF_SINGLESTEP set, thus the kernel will do + * it upon return from the syscall anyway. + */ + if (unlikely(status & SPU_STATUS_SINGLE_STEP)) + ret = -ERESTARTSYS; + + else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP) + && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) { + force_sig(SIGTRAP); + ret = -ERESTARTSYS; + } + +out: + *event = ctx->event_return; +out_unlock: + mutex_unlock(&ctx->run_mutex); + return ret; +} diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c new file mode 100644 index 0000000000..99bd027a7f --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -0,0 +1,1141 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* sched.c - SPU scheduler. + * + * Copyright (C) IBM 2005 + * Author: Mark Nutter + * + * 2006-03-31 NUMA domains added. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "spufs.h" +#define CREATE_TRACE_POINTS +#include "sputrace.h" + +struct spu_prio_array { + DECLARE_BITMAP(bitmap, MAX_PRIO); + struct list_head runq[MAX_PRIO]; + spinlock_t runq_lock; + int nr_waiting; +}; + +static unsigned long spu_avenrun[3]; +static struct spu_prio_array *spu_prio; +static struct task_struct *spusched_task; +static struct timer_list spusched_timer; +static struct timer_list spuloadavg_timer; + +/* + * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). + */ +#define NORMAL_PRIO 120 + +/* + * Frequency of the spu scheduler tick. By default we do one SPU scheduler + * tick for every 10 CPU scheduler ticks. + */ +#define SPUSCHED_TICK (10) + +/* + * These are the 'tuning knobs' of the scheduler: + * + * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is + * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs. + */ +#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1) +#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK)) + +#define SCALE_PRIO(x, prio) \ + max(x * (MAX_PRIO - prio) / (NICE_WIDTH / 2), MIN_SPU_TIMESLICE) + +/* + * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values: + * [800ms ... 100ms ... 5ms] + * + * The higher a thread's priority, the bigger timeslices + * it gets during one round of execution. But even the lowest + * priority thread gets MIN_TIMESLICE worth of execution time. + */ +void spu_set_timeslice(struct spu_context *ctx) +{ + if (ctx->prio < NORMAL_PRIO) + ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio); + else + ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio); +} + +/* + * Update scheduling information from the owning thread. + */ +void __spu_update_sched_info(struct spu_context *ctx) +{ + /* + * assert that the context is not on the runqueue, so it is safe + * to change its scheduling parameters. + */ + BUG_ON(!list_empty(&ctx->rq)); + + /* + * 32-Bit assignments are atomic on powerpc, and we don't care about + * memory ordering here because retrieving the controlling thread is + * per definition racy. + */ + ctx->tid = current->pid; + + /* + * We do our own priority calculations, so we normally want + * ->static_prio to start with. Unfortunately this field + * contains junk for threads with a realtime scheduling + * policy so we have to look at ->prio in this case. + */ + if (rt_prio(current->prio)) + ctx->prio = current->prio; + else + ctx->prio = current->static_prio; + ctx->policy = current->policy; + + /* + * TO DO: the context may be loaded, so we may need to activate + * it again on a different node. But it shouldn't hurt anything + * to update its parameters, because we know that the scheduler + * is not actively looking at this field, since it is not on the + * runqueue. The context will be rescheduled on the proper node + * if it is timesliced or preempted. + */ + cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr); + + /* Save the current cpu id for spu interrupt routing. */ + ctx->last_ran = raw_smp_processor_id(); +} + +void spu_update_sched_info(struct spu_context *ctx) +{ + int node; + + if (ctx->state == SPU_STATE_RUNNABLE) { + node = ctx->spu->node; + + /* + * Take list_mutex to sync with find_victim(). + */ + mutex_lock(&cbe_spu_info[node].list_mutex); + __spu_update_sched_info(ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); + } else { + __spu_update_sched_info(ctx); + } +} + +static int __node_allowed(struct spu_context *ctx, int node) +{ + if (nr_cpus_node(node)) { + const struct cpumask *mask = cpumask_of_node(node); + + if (cpumask_intersects(mask, &ctx->cpus_allowed)) + return 1; + } + + return 0; +} + +static int node_allowed(struct spu_context *ctx, int node) +{ + int rval; + + spin_lock(&spu_prio->runq_lock); + rval = __node_allowed(ctx, node); + spin_unlock(&spu_prio->runq_lock); + + return rval; +} + +void do_notify_spus_active(void) +{ + int node; + + /* + * Wake up the active spu_contexts. + */ + for_each_online_node(node) { + struct spu *spu; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state != SPU_FREE) { + struct spu_context *ctx = spu->ctx; + set_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags); + mb(); + wake_up_all(&ctx->stop_wq); + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } +} + +/** + * spu_bind_context - bind spu context to physical spu + * @spu: physical spu to bind to + * @ctx: context to bind + */ +static void spu_bind_context(struct spu *spu, struct spu_context *ctx) +{ + spu_context_trace(spu_bind_context__enter, ctx, spu); + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (ctx->flags & SPU_CREATE_NOSCHED) + atomic_inc(&cbe_spu_info[spu->node].reserved_spus); + + ctx->stats.slb_flt_base = spu->stats.slb_flt; + ctx->stats.class2_intr_base = spu->stats.class2_intr; + + spu_associate_mm(spu, ctx->owner); + + spin_lock_irq(&spu->register_lock); + spu->ctx = ctx; + spu->flags = 0; + ctx->spu = spu; + ctx->ops = &spu_hw_ops; + spu->pid = current->pid; + spu->tgid = current->tgid; + spu->ibox_callback = spufs_ibox_callback; + spu->wbox_callback = spufs_wbox_callback; + spu->stop_callback = spufs_stop_callback; + spu->mfc_callback = spufs_mfc_callback; + spin_unlock_irq(&spu->register_lock); + + spu_unmap_mappings(ctx); + + spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0); + spu_restore(&ctx->csa, spu); + spu->timestamp = jiffies; + ctx->state = SPU_STATE_RUNNABLE; + + spuctx_switch_state(ctx, SPU_UTIL_USER); +} + +/* + * Must be used with the list_mutex held. + */ +static inline int sched_spu(struct spu *spu) +{ + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); + + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); +} + +static void aff_merge_remaining_ctxs(struct spu_gang *gang) +{ + struct spu_context *ctx; + + list_for_each_entry(ctx, &gang->aff_list_head, aff_list) { + if (list_empty(&ctx->aff_list)) + list_add(&ctx->aff_list, &gang->aff_list_head); + } + gang->aff_flags |= AFF_MERGED; +} + +static void aff_set_offsets(struct spu_gang *gang) +{ + struct spu_context *ctx; + int offset; + + offset = -1; + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset--; + } + + offset = 0; + list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + ctx->aff_offset = offset++; + } + + gang->aff_flags |= AFF_OFFSETS_SET; +} + +static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, + int group_size, int lowest_offset) +{ + struct spu *spu; + int node, n; + + /* + * TODO: A better algorithm could be used to find a good spu to be + * used as reference location for the ctxs chain. + */ + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + /* + * "available_spus" counts how many spus are not potentially + * going to be used by other affinity gangs whose reference + * context is already in place. Although this code seeks to + * avoid having affinity gangs with a summed amount of + * contexts bigger than the amount of spus in the node, + * this may happen sporadically. In this case, available_spus + * becomes negative, which is harmless. + */ + int available_spus; + + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + + available_spus = 0; + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset + && spu->ctx->gang->aff_ref_spu) + available_spus -= spu->ctx->gang->contexts; + available_spus++; + } + if (available_spus < ctx->gang->contexts) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + continue; + } + + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if ((!mem_aff || spu->has_mem_affinity) && + sched_spu(spu)) { + mutex_unlock(&cbe_spu_info[node].list_mutex); + return spu; + } + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + return NULL; +} + +static void aff_set_ref_point_location(struct spu_gang *gang) +{ + int mem_aff, gs, lowest_offset; + struct spu_context *tmp, *ctx; + + mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM; + lowest_offset = 0; + gs = 0; + + list_for_each_entry(tmp, &gang->aff_list_head, aff_list) + gs++; + + list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list, + aff_list) { + if (&ctx->aff_list == &gang->aff_list_head) + break; + lowest_offset = ctx->aff_offset; + } + + gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs, + lowest_offset); +} + +static struct spu *ctx_location(struct spu *ref, int offset, int node) +{ + struct spu *spu; + + spu = NULL; + if (offset >= 0) { + list_for_each_entry(spu, ref->aff_list.prev, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset--; + } + } else { + list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { + BUG_ON(spu->node != node); + if (offset == 0) + break; + if (sched_spu(spu)) + offset++; + } + } + + return spu; +} + +/* + * affinity_check is called each time a context is going to be scheduled. + * It returns the spu ptr on which the context must run. + */ +static int has_affinity(struct spu_context *ctx) +{ + struct spu_gang *gang = ctx->gang; + + if (list_empty(&ctx->aff_list)) + return 0; + + if (atomic_read(&ctx->gang->aff_sched_count) == 0) + ctx->gang->aff_ref_spu = NULL; + + if (!gang->aff_ref_spu) { + if (!(gang->aff_flags & AFF_MERGED)) + aff_merge_remaining_ctxs(gang); + if (!(gang->aff_flags & AFF_OFFSETS_SET)) + aff_set_offsets(gang); + aff_set_ref_point_location(gang); + } + + return gang->aff_ref_spu != NULL; +} + +/** + * spu_unbind_context - unbind spu context from physical spu + * @spu: physical spu to unbind from + * @ctx: context to unbind + */ +static void spu_unbind_context(struct spu *spu, struct spu_context *ctx) +{ + u32 status; + + spu_context_trace(spu_unbind_context__enter, ctx, spu); + + spuctx_switch_state(ctx, SPU_UTIL_SYSTEM); + + if (spu->ctx->flags & SPU_CREATE_NOSCHED) + atomic_dec(&cbe_spu_info[spu->node].reserved_spus); + + if (ctx->gang) + /* + * If ctx->gang->aff_sched_count is positive, SPU affinity is + * being considered in this gang. Using atomic_dec_if_positive + * allow us to skip an explicit check for affinity in this gang + */ + atomic_dec_if_positive(&ctx->gang->aff_sched_count); + + spu_unmap_mappings(ctx); + spu_save(&ctx->csa, spu); + spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0); + + spin_lock_irq(&spu->register_lock); + spu->timestamp = jiffies; + ctx->state = SPU_STATE_SAVED; + spu->ibox_callback = NULL; + spu->wbox_callback = NULL; + spu->stop_callback = NULL; + spu->mfc_callback = NULL; + spu->pid = 0; + spu->tgid = 0; + ctx->ops = &spu_backing_ops; + spu->flags = 0; + spu->ctx = NULL; + spin_unlock_irq(&spu->register_lock); + + spu_associate_mm(spu, NULL); + + ctx->stats.slb_flt += + (spu->stats.slb_flt - ctx->stats.slb_flt_base); + ctx->stats.class2_intr += + (spu->stats.class2_intr - ctx->stats.class2_intr_base); + + /* This maps the underlying spu state to idle */ + spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->spu = NULL; + + if (spu_stopped(ctx, &status)) + wake_up_all(&ctx->stop_wq); +} + +/** + * spu_add_to_rq - add a context to the runqueue + * @ctx: context to add + */ +static void __spu_add_to_rq(struct spu_context *ctx) +{ + /* + * Unfortunately this code path can be called from multiple threads + * on behalf of a single context due to the way the problem state + * mmap support works. + * + * Fortunately we need to wake up all these threads at the same time + * and can simply skip the runqueue addition for every but the first + * thread getting into this codepath. + * + * It's still quite hacky, and long-term we should proxy all other + * threads through the owner thread so that spu_run is in control + * of all the scheduling activity for a given context. + */ + if (list_empty(&ctx->rq)) { + list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]); + set_bit(ctx->prio, spu_prio->bitmap); + if (!spu_prio->nr_waiting++) + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + } +} + +static void spu_add_to_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_add_to_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + +static void __spu_del_from_rq(struct spu_context *ctx) +{ + int prio = ctx->prio; + + if (!list_empty(&ctx->rq)) { + if (!--spu_prio->nr_waiting) + del_timer(&spusched_timer); + list_del_init(&ctx->rq); + + if (list_empty(&spu_prio->runq[prio])) + clear_bit(prio, spu_prio->bitmap); + } +} + +void spu_del_from_rq(struct spu_context *ctx) +{ + spin_lock(&spu_prio->runq_lock); + __spu_del_from_rq(ctx); + spin_unlock(&spu_prio->runq_lock); +} + +static void spu_prio_wait(struct spu_context *ctx) +{ + DEFINE_WAIT(wait); + + /* + * The caller must explicitly wait for a context to be loaded + * if the nosched flag is set. If NOSCHED is not set, the caller + * queues the context and waits for an spu event or error. + */ + BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED)); + + spin_lock(&spu_prio->runq_lock); + prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE); + if (!signal_pending(current)) { + __spu_add_to_rq(ctx); + spin_unlock(&spu_prio->runq_lock); + mutex_unlock(&ctx->state_mutex); + schedule(); + mutex_lock(&ctx->state_mutex); + spin_lock(&spu_prio->runq_lock); + __spu_del_from_rq(ctx); + } + spin_unlock(&spu_prio->runq_lock); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&ctx->stop_wq, &wait); +} + +static struct spu *spu_get_idle(struct spu_context *ctx) +{ + struct spu *spu, *aff_ref_spu; + int node, n; + + spu_context_nospu_trace(spu_get_idle__enter, ctx); + + if (ctx->gang) { + mutex_lock(&ctx->gang->aff_mutex); + if (has_affinity(ctx)) { + aff_ref_spu = ctx->gang->aff_ref_spu; + atomic_inc(&ctx->gang->aff_sched_count); + mutex_unlock(&ctx->gang->aff_mutex); + node = aff_ref_spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + + atomic_dec(&ctx->gang->aff_sched_count); + goto not_found; + } + mutex_unlock(&ctx->gang->aff_mutex); + } + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state == SPU_FREE) + goto found; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + + not_found: + spu_context_nospu_trace(spu_get_idle__not_found, ctx); + return NULL; + + found: + spu->alloc_state = SPU_USED; + mutex_unlock(&cbe_spu_info[node].list_mutex); + spu_context_trace(spu_get_idle__found, ctx, spu); + spu_init_channels(spu); + return spu; +} + +/** + * find_victim - find a lower priority context to preempt + * @ctx: candidate context for running + * + * Returns the freed physical spu to run the new context on. + */ +static struct spu *find_victim(struct spu_context *ctx) +{ + struct spu_context *victim = NULL; + struct spu *spu; + int node, n; + + spu_context_nospu_trace(spu_find_victim__enter, ctx); + + /* + * Look for a possible preemption candidate on the local node first. + * If there is no candidate look at the other nodes. This isn't + * exactly fair, but so far the whole spu scheduler tries to keep + * a strong node affinity. We might want to fine-tune this in + * the future. + */ + restart: + node = cpu_to_node(raw_smp_processor_id()); + for (n = 0; n < MAX_NUMNODES; n++, node++) { + node = (node < MAX_NUMNODES) ? node : 0; + if (!node_allowed(ctx, node)) + continue; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + struct spu_context *tmp = spu->ctx; + + if (tmp && tmp->prio > ctx->prio && + !(tmp->flags & SPU_CREATE_NOSCHED) && + (!victim || tmp->prio > victim->prio)) { + victim = spu->ctx; + } + } + if (victim) + get_spu_context(victim); + mutex_unlock(&cbe_spu_info[node].list_mutex); + + if (victim) { + /* + * This nests ctx->state_mutex, but we always lock + * higher priority contexts before lower priority + * ones, so this is safe until we introduce + * priority inheritance schemes. + * + * XXX if the highest priority context is locked, + * this can loop a long time. Might be better to + * look at another context or give up after X retries. + */ + if (!mutex_trylock(&victim->state_mutex)) { + put_spu_context(victim); + victim = NULL; + goto restart; + } + + spu = victim->spu; + if (!spu || victim->prio <= ctx->prio) { + /* + * This race can happen because we've dropped + * the active list mutex. Not a problem, just + * restart the search. + */ + mutex_unlock(&victim->state_mutex); + put_spu_context(victim); + victim = NULL; + goto restart; + } + + spu_context_trace(__spu_deactivate__unload, ctx, spu); + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + spu_unbind_context(spu, victim); + mutex_unlock(&cbe_spu_info[node].list_mutex); + + victim->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags)) + spu_add_to_rq(victim); + + mutex_unlock(&victim->state_mutex); + put_spu_context(victim); + + return spu; + } + } + + return NULL; +} + +static void __spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + int node = spu->node; + int success = 0; + + spu_set_timeslice(ctx); + + mutex_lock(&cbe_spu_info[node].list_mutex); + if (spu->ctx == NULL) { + spu_bind_context(spu, ctx); + cbe_spu_info[node].nr_active++; + spu->alloc_state = SPU_USED; + success = 1; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); + + if (success) + wake_up_all(&ctx->run_wq); + else + spu_add_to_rq(ctx); +} + +static void spu_schedule(struct spu *spu, struct spu_context *ctx) +{ + /* not a candidate for interruptible because it's called either + from the scheduler thread or from spu_deactivate */ + mutex_lock(&ctx->state_mutex); + if (ctx->state == SPU_STATE_SAVED) + __spu_schedule(spu, ctx); + spu_release(ctx); +} + +/** + * spu_unschedule - remove a context from a spu, and possibly release it. + * @spu: The SPU to unschedule from + * @ctx: The context currently scheduled on the SPU + * @free_spu Whether to free the SPU for other contexts + * + * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the + * SPU is made available for other contexts (ie, may be returned by + * spu_get_idle). If this is zero, the caller is expected to schedule another + * context to this spu. + * + * Should be called with ctx->state_mutex held. + */ +static void spu_unschedule(struct spu *spu, struct spu_context *ctx, + int free_spu) +{ + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + if (free_spu) + spu->alloc_state = SPU_FREE; + spu_unbind_context(spu, ctx); + ctx->stats.invol_ctx_switch++; + spu->stats.invol_ctx_switch++; + mutex_unlock(&cbe_spu_info[node].list_mutex); +} + +/** + * spu_activate - find a free spu for a context and execute it + * @ctx: spu context to schedule + * @flags: flags (currently ignored) + * + * Tries to find a free spu to run @ctx. If no free spu is available + * add the context to the runqueue so it gets woken up once an spu + * is available. + */ +int spu_activate(struct spu_context *ctx, unsigned long flags) +{ + struct spu *spu; + + /* + * If there are multiple threads waiting for a single context + * only one actually binds the context while the others will + * only be able to acquire the state_mutex once the context + * already is in runnable state. + */ + if (ctx->spu) + return 0; + +spu_activate_top: + if (signal_pending(current)) + return -ERESTARTSYS; + + spu = spu_get_idle(ctx); + /* + * If this is a realtime thread we try to get it running by + * preempting a lower priority thread. + */ + if (!spu && rt_prio(ctx->prio)) + spu = find_victim(ctx); + if (spu) { + unsigned long runcntl; + + runcntl = ctx->ops->runcntl_read(ctx); + __spu_schedule(spu, ctx); + if (runcntl & SPU_RUNCNTL_RUNNABLE) + spuctx_switch_state(ctx, SPU_UTIL_USER); + + return 0; + } + + if (ctx->flags & SPU_CREATE_NOSCHED) { + spu_prio_wait(ctx); + goto spu_activate_top; + } + + spu_add_to_rq(ctx); + + return 0; +} + +/** + * grab_runnable_context - try to find a runnable context + * + * Remove the highest priority context on the runqueue and return it + * to the caller. Returns %NULL if no runnable context was found. + */ +static struct spu_context *grab_runnable_context(int prio, int node) +{ + struct spu_context *ctx; + int best; + + spin_lock(&spu_prio->runq_lock); + best = find_first_bit(spu_prio->bitmap, prio); + while (best < prio) { + struct list_head *rq = &spu_prio->runq[best]; + + list_for_each_entry(ctx, rq, rq) { + /* XXX(hch): check for affinity here as well */ + if (__node_allowed(ctx, node)) { + __spu_del_from_rq(ctx); + goto found; + } + } + best++; + } + ctx = NULL; + found: + spin_unlock(&spu_prio->runq_lock); + return ctx; +} + +static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) +{ + struct spu *spu = ctx->spu; + struct spu_context *new = NULL; + + if (spu) { + new = grab_runnable_context(max_prio, spu->node); + if (new || force) { + spu_unschedule(spu, ctx, new == NULL); + if (new) { + if (new->flags & SPU_CREATE_NOSCHED) + wake_up(&new->stop_wq); + else { + spu_release(ctx); + spu_schedule(spu, new); + /* this one can't easily be made + interruptible */ + mutex_lock(&ctx->state_mutex); + } + } + } + } + + return new != NULL; +} + +/** + * spu_deactivate - unbind a context from it's physical spu + * @ctx: spu context to unbind + * + * Unbind @ctx from the physical spu it is running on and schedule + * the highest priority context to run on the freed physical spu. + */ +void spu_deactivate(struct spu_context *ctx) +{ + spu_context_nospu_trace(spu_deactivate__enter, ctx); + __spu_deactivate(ctx, 1, MAX_PRIO); +} + +/** + * spu_yield - yield a physical spu if others are waiting + * @ctx: spu context to yield + * + * Check if there is a higher priority context waiting and if yes + * unbind @ctx from the physical spu and schedule the highest + * priority context to run on the freed physical spu instead. + */ +void spu_yield(struct spu_context *ctx) +{ + spu_context_nospu_trace(spu_yield__enter, ctx); + if (!(ctx->flags & SPU_CREATE_NOSCHED)) { + mutex_lock(&ctx->state_mutex); + __spu_deactivate(ctx, 0, MAX_PRIO); + mutex_unlock(&ctx->state_mutex); + } +} + +static noinline void spusched_tick(struct spu_context *ctx) +{ + struct spu_context *new = NULL; + struct spu *spu = NULL; + + if (spu_acquire(ctx)) + BUG(); /* a kernel thread never has signals pending */ + + if (ctx->state != SPU_STATE_RUNNABLE) + goto out; + if (ctx->flags & SPU_CREATE_NOSCHED) + goto out; + if (ctx->policy == SCHED_FIFO) + goto out; + + if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + goto out; + + spu = ctx->spu; + + spu_context_trace(spusched_tick__preempt, ctx, spu); + + new = grab_runnable_context(ctx->prio + 1, spu->node); + if (new) { + spu_unschedule(spu, ctx, 0); + if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) + spu_add_to_rq(ctx); + } else { + spu_context_nospu_trace(spusched_tick__newslice, ctx); + if (!ctx->time_slice) + ctx->time_slice++; + } +out: + spu_release(ctx); + + if (new) + spu_schedule(spu, new); +} + +/** + * count_active_contexts - count nr of active tasks + * + * Return the number of tasks currently running or waiting to run. + * + * Note that we don't take runq_lock / list_mutex here. Reading + * a single 32bit value is atomic on powerpc, and we don't care + * about memory ordering issues here. + */ +static unsigned long count_active_contexts(void) +{ + int nr_active = 0, node; + + for (node = 0; node < MAX_NUMNODES; node++) + nr_active += cbe_spu_info[node].nr_active; + nr_active += spu_prio->nr_waiting; + + return nr_active; +} + +/** + * spu_calc_load - update the avenrun load estimates. + * + * No locking against reading these values from userspace, as for + * the CPU loadavg code. + */ +static void spu_calc_load(void) +{ + unsigned long active_tasks; /* fixed-point */ + + active_tasks = count_active_contexts() * FIXED_1; + spu_avenrun[0] = calc_load(spu_avenrun[0], EXP_1, active_tasks); + spu_avenrun[1] = calc_load(spu_avenrun[1], EXP_5, active_tasks); + spu_avenrun[2] = calc_load(spu_avenrun[2], EXP_15, active_tasks); +} + +static void spusched_wake(struct timer_list *unused) +{ + mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); + wake_up_process(spusched_task); +} + +static void spuloadavg_wake(struct timer_list *unused) +{ + mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ); + spu_calc_load(); +} + +static int spusched_thread(void *unused) +{ + struct spu *spu; + int node; + + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + for (node = 0; node < MAX_NUMNODES; node++) { + struct mutex *mtx = &cbe_spu_info[node].list_mutex; + + mutex_lock(mtx); + list_for_each_entry(spu, &cbe_spu_info[node].spus, + cbe_list) { + struct spu_context *ctx = spu->ctx; + + if (ctx) { + get_spu_context(ctx); + mutex_unlock(mtx); + spusched_tick(ctx); + mutex_lock(mtx); + put_spu_context(ctx); + } + } + mutex_unlock(mtx); + } + } + + return 0; +} + +void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state) +{ + unsigned long long curtime; + signed long long delta; + struct spu *spu; + enum spu_utilization_state old_state; + int node; + + curtime = ktime_get_ns(); + delta = curtime - ctx->stats.tstamp; + + WARN_ON(!mutex_is_locked(&ctx->state_mutex)); + WARN_ON(delta < 0); + + spu = ctx->spu; + old_state = ctx->stats.util_state; + ctx->stats.util_state = new_state; + ctx->stats.tstamp = curtime; + + /* + * Update the physical SPU utilization statistics. + */ + if (spu) { + ctx->stats.times[old_state] += delta; + spu->stats.times[old_state] += delta; + spu->stats.util_state = new_state; + spu->stats.tstamp = curtime; + node = spu->node; + if (old_state == SPU_UTIL_USER) + atomic_dec(&cbe_spu_info[node].busy_spus); + if (new_state == SPU_UTIL_USER) + atomic_inc(&cbe_spu_info[node].busy_spus); + } +} + +#ifdef CONFIG_PROC_FS +static int show_spu_loadavg(struct seq_file *s, void *private) +{ + int a, b, c; + + a = spu_avenrun[0] + (FIXED_1/200); + b = spu_avenrun[1] + (FIXED_1/200); + c = spu_avenrun[2] + (FIXED_1/200); + + /* + * Note that last_pid doesn't really make much sense for the + * SPU loadavg (it even seems very odd on the CPU side...), + * but we include it here to have a 100% compatible interface. + */ + seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c), + count_active_contexts(), + atomic_read(&nr_spu_contexts), + idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); + return 0; +} +#endif + +int __init spu_sched_init(void) +{ + struct proc_dir_entry *entry; + int err = -ENOMEM, i; + + spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); + if (!spu_prio) + goto out; + + for (i = 0; i < MAX_PRIO; i++) { + INIT_LIST_HEAD(&spu_prio->runq[i]); + __clear_bit(i, spu_prio->bitmap); + } + spin_lock_init(&spu_prio->runq_lock); + + timer_setup(&spusched_timer, spusched_wake, 0); + timer_setup(&spuloadavg_timer, spuloadavg_wake, 0); + + spusched_task = kthread_run(spusched_thread, NULL, "spusched"); + if (IS_ERR(spusched_task)) { + err = PTR_ERR(spusched_task); + goto out_free_spu_prio; + } + + mod_timer(&spuloadavg_timer, 0); + + entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg); + if (!entry) + goto out_stop_kthread; + + pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n", + SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE); + return 0; + + out_stop_kthread: + kthread_stop(spusched_task); + out_free_spu_prio: + kfree(spu_prio); + out: + return err; +} + +void spu_sched_exit(void) +{ + struct spu *spu; + int node; + + remove_proc_entry("spu_loadavg", NULL); + + del_timer_sync(&spusched_timer); + del_timer_sync(&spuloadavg_timer); + kthread_stop(spusched_task); + + for (node = 0; node < MAX_NUMNODES; node++) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->alloc_state != SPU_FREE) + spu->alloc_state = SPU_FREE; + mutex_unlock(&cbe_spu_info[node].list_mutex); + } + kfree(spu_prio); +} diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c new file mode 100644 index 0000000000..2cbb6efb2d --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * spu_restore.c + * + * (C) Copyright IBM Corp. 2005 + * + * SPU-side context restore sequence outlined in + * Synergistic Processor Element Book IV + * + * Author: Mark Nutter + */ + + +#ifndef LS_SIZE +#define LS_SIZE 0x40000 /* 256K (in bytes) */ +#endif + +typedef unsigned int u32; +typedef unsigned long long u64; + +#include +#include +#include "spu_utils.h" + +#define BR_INSTR 0x327fff80 /* br -4 */ +#define NOP_INSTR 0x40200000 /* nop */ +#define HEQ_INSTR 0x7b000000 /* heq $0, $0 */ +#define STOP_INSTR 0x00000000 /* stop 0x0 */ +#define ILLEGAL_INSTR 0x00800000 /* illegal instr */ +#define RESTORE_COMPLETE 0x00003ffc /* stop 0x3ffc */ + +static inline void fetch_regs_from_mem(addr64 lscsa_ea) +{ + unsigned int ls = (unsigned int)®s_spill[0]; + unsigned int size = sizeof(regs_spill); + unsigned int tag_id = 0; + unsigned int cmd = 0x40; /* GET */ + + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void restore_upper_240kb(addr64 lscsa_ea) +{ + unsigned int ls = 16384; + unsigned int list = (unsigned int)&dma_list[0]; + unsigned int size = sizeof(dma_list); + unsigned int tag_id = 0; + unsigned int cmd = 0x44; /* GETL */ + + /* Restore, Step 4: + * Enqueue the GETL command (tag 0) to the MFC SPU command + * queue to transfer the upper 240 kb of LS from CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, list); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void restore_decr(void) +{ + unsigned int offset; + unsigned int decr_running; + unsigned int decr; + + /* Restore, Step 6(moved): + * If the LSCSA "decrementer running" flag is set + * then write the SPU_WrDec channel with the + * decrementer value from LSCSA. + */ + offset = LSCSA_QW_OFFSET(decr_status); + decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; + if (decr_running) { + offset = LSCSA_QW_OFFSET(decr); + decr = regs_spill[offset].slot[0]; + spu_writech(SPU_WrDec, decr); + } +} + +static inline void write_ppu_mb(void) +{ + unsigned int offset; + unsigned int data; + + /* Restore, Step 11: + * Write the MFC_WrOut_MB channel with the PPU_MB + * data from LSCSA. + */ + offset = LSCSA_QW_OFFSET(ppu_mb); + data = regs_spill[offset].slot[0]; + spu_writech(SPU_WrOutMbox, data); +} + +static inline void write_ppuint_mb(void) +{ + unsigned int offset; + unsigned int data; + + /* Restore, Step 12: + * Write the MFC_WrInt_MB channel with the PPUINT_MB + * data from LSCSA. + */ + offset = LSCSA_QW_OFFSET(ppuint_mb); + data = regs_spill[offset].slot[0]; + spu_writech(SPU_WrOutIntrMbox, data); +} + +static inline void restore_fpcr(void) +{ + unsigned int offset; + vector unsigned int fpcr; + + /* Restore, Step 13: + * Restore the floating-point status and control + * register from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(fpcr); + fpcr = regs_spill[offset].v; + spu_mtfpscr(fpcr); +} + +static inline void restore_srr0(void) +{ + unsigned int offset; + unsigned int srr0; + + /* Restore, Step 14: + * Restore the SPU SRR0 data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(srr0); + srr0 = regs_spill[offset].slot[0]; + spu_writech(SPU_WrSRR0, srr0); +} + +static inline void restore_event_mask(void) +{ + unsigned int offset; + unsigned int event_mask; + + /* Restore, Step 15: + * Restore the SPU_RdEventMsk data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(event_mask); + event_mask = regs_spill[offset].slot[0]; + spu_writech(SPU_WrEventMask, event_mask); +} + +static inline void restore_tag_mask(void) +{ + unsigned int offset; + unsigned int tag_mask; + + /* Restore, Step 16: + * Restore the SPU_RdTagMsk data from the LSCSA. + */ + offset = LSCSA_QW_OFFSET(tag_mask); + tag_mask = regs_spill[offset].slot[0]; + spu_writech(MFC_WrTagMask, tag_mask); +} + +static inline void restore_complete(void) +{ + extern void exit_fini(void); + unsigned int *exit_instrs = (unsigned int *)exit_fini; + unsigned int offset; + unsigned int stopped_status; + unsigned int stopped_code; + + /* Restore, Step 18: + * Issue a stop-and-signal instruction with + * "good context restore" signal value. + * + * Restore, Step 19: + * There may be additional instructions placed + * here by the PPE Sequence for SPU Context + * Restore in order to restore the correct + * "stopped state". + * + * This step is handled here by analyzing the + * LSCSA.stopped_status and then modifying the + * exit() function to behave appropriately. + */ + + offset = LSCSA_QW_OFFSET(stopped_status); + stopped_status = regs_spill[offset].slot[0]; + stopped_code = regs_spill[offset].slot[1]; + + switch (stopped_status) { + case SPU_STOPPED_STATUS_P_I: + /* SPU_Status[P,I]=1. Add illegal instruction + * followed by stop-and-signal instruction after + * end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_P_H: + /* SPU_Status[P,H]=1. Add 'heq $0, $0' followed + * by stop-and-signal instruction after end of + * restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = HEQ_INSTR; + exit_instrs[2] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_S_P: + /* SPU_Status[S,P]=1. Add nop instruction + * followed by 'br -4' after end of restore + * code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = STOP_INSTR | stopped_code; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_S_I: + /* SPU_Status[S,I]=1. Add illegal instruction + * followed by 'br -4' after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_I: + /* SPU_Status[I]=1. Add illegal instruction followed + * by infinite loop after end of restore sequence. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = ILLEGAL_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_S: + /* SPU_Status[S]=1. Add two 'nop' instructions. */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = NOP_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_H: + /* SPU_Status[H]=1. Add 'heq $0, $0' instruction + * after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = HEQ_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + case SPU_STOPPED_STATUS_P: + /* SPU_Status[P]=1. Add stop-and-signal instruction + * after end of restore code. + */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = STOP_INSTR | stopped_code; + break; + case SPU_STOPPED_STATUS_R: + /* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */ + exit_instrs[0] = RESTORE_COMPLETE; + exit_instrs[1] = NOP_INSTR; + exit_instrs[2] = NOP_INSTR; + exit_instrs[3] = BR_INSTR; + break; + default: + /* SPU_Status[R]=1. No additional instructions. */ + break; + } + spu_sync(); +} + +/** + * main - entry point for SPU-side context restore. + * + * This code deviates from the documented sequence in the + * following aspects: + * + * 1. The EA for LSCSA is passed from PPE in the + * signal notification channels. + * 2. The register spill area is pulled by SPU + * into LS, rather than pushed by PPE. + * 3. All 128 registers are restored by exit(). + * 4. The exit() function is modified at run + * time in order to properly restore the + * SPU_Status register. + */ +int main() +{ + addr64 lscsa_ea; + + lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); + lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); + fetch_regs_from_mem(lscsa_ea); + + set_event_mask(); /* Step 1. */ + set_tag_mask(); /* Step 2. */ + build_dma_list(lscsa_ea); /* Step 3. */ + restore_upper_240kb(lscsa_ea); /* Step 4. */ + /* Step 5: done by 'exit'. */ + enqueue_putllc(lscsa_ea); /* Step 7. */ + set_tag_update(); /* Step 8. */ + read_tag_status(); /* Step 9. */ + restore_decr(); /* moved Step 6. */ + read_llar_status(); /* Step 10. */ + write_ppu_mb(); /* Step 11. */ + write_ppuint_mb(); /* Step 12. */ + restore_fpcr(); /* Step 13. */ + restore_srr0(); /* Step 14. */ + restore_event_mask(); /* Step 15. */ + restore_tag_mask(); /* Step 16. */ + /* Step 17. done by 'exit'. */ + restore_complete(); /* Step 18. */ + + return 0; +} diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S new file mode 100644 index 0000000000..6d799f8476 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * crt0_r.S: Entry function for SPU-side context restore. + * + * Copyright (C) 2005 IBM + * + * Entry and exit function for SPU-side of the context restore + * sequence. Sets up an initial stack frame, then branches to + * 'main'. On return, restores all 128 registers from the LSCSA + * and exits. + */ + +#include + +.data +.align 7 +.globl regs_spill +regs_spill: +.space SIZEOF_SPU_SPILL_REGS, 0x0 + +.text +.global _start +_start: + /* Initialize the stack pointer to point to 16368 + * (16kb-16). The back chain pointer is initialized + * to NULL. + */ + il $0, 0 + il $SP, 16368 + stqd $0, 0($SP) + + /* Allocate a minimum stack frame for the called main. + * This is needed so that main has a place to save the + * link register when it calls another function. + */ + stqd $SP, -160($SP) + ai $SP, $SP, -160 + + /* Call the program's main function. */ + brsl $0, main + +.global exit +.global _exit +exit: +_exit: + /* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */ + ila $3, regs_spill + 256 +restore_regs: + lqr $4, restore_reg_insts +restore_reg_loop: + ai $4, $4, 4 + .balignl 16, 0x40200000 +restore_reg_insts: /* must be quad-word aligned. */ + lqd $16, 0($3) + lqd $17, 16($3) + lqd $18, 32($3) + lqd $19, 48($3) + andi $5, $4, 0x7F + stqr $4, restore_reg_insts + ai $3, $3, 64 + brnz $5, restore_reg_loop + + /* SPU Context Restore Step 17: Restore the first 16 GPRs. */ + lqa $0, regs_spill + 0 + lqa $1, regs_spill + 16 + lqa $2, regs_spill + 32 + lqa $3, regs_spill + 48 + lqa $4, regs_spill + 64 + lqa $5, regs_spill + 80 + lqa $6, regs_spill + 96 + lqa $7, regs_spill + 112 + lqa $8, regs_spill + 128 + lqa $9, regs_spill + 144 + lqa $10, regs_spill + 160 + lqa $11, regs_spill + 176 + lqa $12, regs_spill + 192 + lqa $13, regs_spill + 208 + lqa $14, regs_spill + 224 + lqa $15, regs_spill + 240 + + /* Under normal circumstances, the 'exit' function + * terminates with 'stop SPU_RESTORE_COMPLETE', + * indicating that the SPU-side restore code has + * completed. + * + * However it is possible that instructions immediately + * following the 'stop 0x3ffc' have been modified at run + * time so as to recreate the exact SPU_Status settings + * from the application, e.g. illegal instruciton, halt, + * etc. + */ +.global exit_fini +.global _exit_fini +exit_fini: +_exit_fini: + stop SPU_RESTORE_COMPLETE + stop 0 + stop 0 + stop 0 + + /* Pad the size of this crt0.o to be multiple of 16 bytes. */ +.balignl 16, 0x0 diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped new file mode 100644 index 0000000000..f383b027e8 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped @@ -0,0 +1,935 @@ +/* + * spu_restore_dump.h: Copyright (C) 2005 IBM. + * Hex-dump auto generated from spu_restore.c. + * Do not edit! + */ +static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = { +0x40800000, +0x409ff801, +0x24000080, +0x24fd8081, +0x1cd80081, +0x33001180, +0x42034003, +0x33800284, +0x1c010204, +0x40200000, +0x40200000, +0x40200000, +0x34000190, +0x34004191, +0x34008192, +0x3400c193, +0x141fc205, +0x23fffd84, +0x1c100183, +0x217ffa85, +0x3080b000, +0x3080b201, +0x3080b402, +0x3080b603, +0x3080b804, +0x3080ba05, +0x3080bc06, +0x3080be07, +0x3080c008, +0x3080c209, +0x3080c40a, +0x3080c60b, +0x3080c80c, +0x3080ca0d, +0x3080cc0e, +0x3080ce0f, +0x00003ffc, +0x00000000, +0x00000000, +0x00000000, +0x01a00182, +0x3ec00083, +0xb0a14103, +0x01a00204, +0x3ec10083, +0x4202c002, +0xb0a14203, +0x21a00802, +0x3fbf028a, +0x3f20050a, +0x3fbe0502, +0x3fe30102, +0x21a00882, +0x3f82028b, +0x3fe3058b, +0x3fbf0584, +0x3f200204, +0x3fbe0204, +0x3fe30204, +0x04000203, +0x21a00903, +0x40848002, +0x21a00982, +0x40800003, +0x21a00a03, +0x40802002, +0x21a00a82, +0x21a00083, +0x40800082, +0x21a00b02, +0x10002612, +0x42a00003, +0x42074006, +0x1800c204, +0x40a00008, +0x40800789, +0x1c010305, +0x34000302, +0x1cffc489, +0x3ec00303, +0x3ec00287, +0xb0408403, +0x24000302, +0x34000282, +0x1c020306, +0xb0408207, +0x18020204, +0x24000282, +0x217ffa09, +0x04000402, +0x21a00802, +0x3fbe0504, +0x3fe30204, +0x21a00884, +0x42074002, +0x21a00902, +0x40803c03, +0x21a00983, +0x04000485, +0x21a00a05, +0x40802202, +0x21a00a82, +0x21a00805, +0x21a00884, +0x3fbf0582, +0x3f200102, +0x3fbe0102, +0x3fe30102, +0x21a00902, +0x40804003, +0x21a00983, +0x21a00a05, +0x40805a02, +0x21a00a82, +0x40800083, +0x21a00b83, +0x01a00c02, +0x30809c03, +0x34000182, +0x14004102, +0x21002082, +0x01a00d82, +0x3080a003, +0x34000182, +0x21a00e02, +0x3080a203, +0x34000182, +0x21a00f02, +0x3080a403, +0x34000182, +0x77400100, +0x3080a603, +0x34000182, +0x21a00702, +0x3080a803, +0x34000182, +0x21a00082, +0x3080aa03, +0x34000182, +0x21a00b02, +0x4020007f, +0x3080ae02, +0x42004805, +0x3080ac04, +0x34000103, +0x34000202, +0x1cffc183, +0x3b810106, +0x0f608184, +0x42013802, +0x5c020183, +0x38810102, +0x3b810102, +0x21000e83, +0x4020007f, +0x35000100, +0x00000470, +0x000002f8, +0x00000430, +0x00000360, +0x000002f8, +0x000003c8, +0x000004a8, +0x00000298, +0x00000360, +0x00200000, +0x409ffe02, +0x30801203, +0x40800208, +0x3ec40084, +0x40800407, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x38820282, +0x41004003, +0xb0408189, +0x28820282, +0x3881c282, +0xb0408304, +0x2881c282, +0x00400000, +0x40800003, +0x35000000, +0x30809e03, +0x34000182, +0x21a00382, +0x4020007f, +0x327fde00, +0x409ffe02, +0x30801203, +0x40800206, +0x3ec40084, +0x40800407, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x38818282, +0x41004003, +0xb040818a, +0x10005b0b, +0x41201003, +0x28818282, +0x3881c282, +0xb0408184, +0x41193f83, +0x60ffc003, +0x2881c282, +0x38820282, +0xb0408189, +0x28820282, +0x327fef80, +0x409ffe02, +0x30801203, +0x40800207, +0x3ec40086, +0x4120100b, +0x10005b14, +0x40800404, +0x3ac1c289, +0x40800608, +0xb060c106, +0x3ac10286, +0x3ac2028a, +0x20801203, +0x3881c282, +0x41193f83, +0x60ffc003, +0xb0408589, +0x2881c282, +0x38810282, +0xb0408586, +0x28810282, +0x38820282, +0xb040818a, +0x28820282, +0x4020007f, +0x327fe280, +0x409ffe02, +0x30801203, +0x40800207, +0x3ec40084, +0x40800408, +0x10005b14, +0x40800609, +0x3ac1c28a, +0x3ac2028b, +0xb060c104, +0x3ac24284, +0x20801203, +0x41201003, +0x3881c282, +0xb040830a, +0x2881c282, +0x38820282, +0xb040818b, +0x41193f83, +0x60ffc003, +0x28820282, +0x38824282, +0xb0408184, +0x28824282, +0x4020007f, +0x327fd580, +0x409ffe02, +0x1000658e, +0x40800206, +0x30801203, +0x40800407, +0x3ec40084, +0x40800608, +0x3ac1828a, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x413d8003, +0x38818282, +0x4020007f, +0x327fd800, +0x409ffe03, +0x30801202, +0x40800207, +0x3ec40084, +0x10005b09, +0x3ac1c288, +0xb0408184, +0x4020007f, +0x4020007f, +0x20801202, +0x3881c282, +0xb0408308, +0x2881c282, +0x327fc680, +0x409ffe02, +0x1000588b, +0x40800208, +0x30801203, +0x40800407, +0x3ec40084, +0x3ac20289, +0xb060c104, +0x3ac1c284, +0x20801203, +0x413d8003, +0x38820282, +0x327fbd80, +0x00200000, +0x00000da0, +0x00000000, +0x00000000, +0x00000000, +0x00000d90, +0x00000000, +0x00000000, +0x00000000, +0x00000db0, +0x00000000, +0x00000000, +0x00000000, +0x00000dc0, +0x00000000, +0x00000000, +0x00000000, +0x00000d80, +0x00000000, +0x00000000, +0x00000000, +0x00000df0, +0x00000000, +0x00000000, +0x00000000, +0x00000de0, +0x00000000, +0x00000000, +0x00000000, +0x00000dd0, +0x00000000, +0x00000000, +0x00000000, +0x00000e04, +0x00000000, +0x00000000, +0x00000000, +0x00000e00, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +}; diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c new file mode 100644 index 0000000000..28c88e3243 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * spu_save.c + * + * (C) Copyright IBM Corp. 2005 + * + * SPU-side context save sequence outlined in + * Synergistic Processor Element Book IV + * + * Author: Mark Nutter + */ + + +#ifndef LS_SIZE +#define LS_SIZE 0x40000 /* 256K (in bytes) */ +#endif + +typedef unsigned int u32; +typedef unsigned long long u64; + +#include +#include +#include "spu_utils.h" + +static inline void save_event_mask(void) +{ + unsigned int offset; + + /* Save, Step 2: + * Read the SPU_RdEventMsk channel and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(event_mask); + regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask); +} + +static inline void save_tag_mask(void) +{ + unsigned int offset; + + /* Save, Step 3: + * Read the SPU_RdTagMsk channel and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(tag_mask); + regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask); +} + +static inline void save_upper_240kb(addr64 lscsa_ea) +{ + unsigned int ls = 16384; + unsigned int list = (unsigned int)&dma_list[0]; + unsigned int size = sizeof(dma_list); + unsigned int tag_id = 0; + unsigned int cmd = 0x24; /* PUTL */ + + /* Save, Step 7: + * Enqueue the PUTL command (tag 0) to the MFC SPU command + * queue to transfer the remaining 240 kb of LS to CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, list); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void save_fpcr(void) +{ + // vector unsigned int fpcr; + unsigned int offset; + + /* Save, Step 9: + * Issue the floating-point status and control register + * read instruction, and save to the LSCSA. + */ + offset = LSCSA_QW_OFFSET(fpcr); + regs_spill[offset].v = spu_mffpscr(); +} + +static inline void save_decr(void) +{ + unsigned int offset; + + /* Save, Step 10: + * Read and save the SPU_RdDec channel data to + * the LSCSA. + */ + offset = LSCSA_QW_OFFSET(decr); + regs_spill[offset].slot[0] = spu_readch(SPU_RdDec); +} + +static inline void save_srr0(void) +{ + unsigned int offset; + + /* Save, Step 11: + * Read and save the SPU_WSRR0 channel data to + * the LSCSA. + */ + offset = LSCSA_QW_OFFSET(srr0); + regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0); +} + +static inline void spill_regs_to_mem(addr64 lscsa_ea) +{ + unsigned int ls = (unsigned int)®s_spill[0]; + unsigned int size = sizeof(regs_spill); + unsigned int tag_id = 0; + unsigned int cmd = 0x20; /* PUT */ + + /* Save, Step 13: + * Enqueue a PUT command (tag 0) to send the LSCSA + * to the CSA. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void enqueue_sync(addr64 lscsa_ea) +{ + unsigned int tag_id = 0; + unsigned int cmd = 0xCC; + + /* Save, Step 14: + * Enqueue an MFC_SYNC command (tag 0). + */ + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void save_complete(void) +{ + /* Save, Step 18: + * Issue a stop-and-signal instruction indicating + * "save complete". Note: This function will not + * return!! + */ + spu_stop(SPU_SAVE_COMPLETE); +} + +/** + * main - entry point for SPU-side context save. + * + * This code deviates from the documented sequence as follows: + * + * 1. The EA for LSCSA is passed from PPE in the + * signal notification channels. + * 2. All 128 registers are saved by crt0.o. + */ +int main() +{ + addr64 lscsa_ea; + + lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1); + lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2); + + /* Step 1: done by exit(). */ + save_event_mask(); /* Step 2. */ + save_tag_mask(); /* Step 3. */ + set_event_mask(); /* Step 4. */ + set_tag_mask(); /* Step 5. */ + build_dma_list(lscsa_ea); /* Step 6. */ + save_upper_240kb(lscsa_ea); /* Step 7. */ + /* Step 8: done by exit(). */ + save_fpcr(); /* Step 9. */ + save_decr(); /* Step 10. */ + save_srr0(); /* Step 11. */ + enqueue_putllc(lscsa_ea); /* Step 12. */ + spill_regs_to_mem(lscsa_ea); /* Step 13. */ + enqueue_sync(lscsa_ea); /* Step 14. */ + set_tag_update(); /* Step 15. */ + read_tag_status(); /* Step 16. */ + read_llar_status(); /* Step 17. */ + save_complete(); /* Step 18. */ + + return 0; +} diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S new file mode 100644 index 0000000000..5ce32efdca --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * crt0_s.S: Entry function for SPU-side context save. + * + * Copyright (C) 2005 IBM + * + * Entry function for SPU-side of the context save sequence. + * Saves all 128 GPRs, sets up an initial stack frame, then + * branches to 'main'. + */ + +#include + +.data +.align 7 +.globl regs_spill +regs_spill: +.space SIZEOF_SPU_SPILL_REGS, 0x0 + +.text +.global _start +_start: + /* SPU Context Save Step 1: Save the first 16 GPRs. */ + stqa $0, regs_spill + 0 + stqa $1, regs_spill + 16 + stqa $2, regs_spill + 32 + stqa $3, regs_spill + 48 + stqa $4, regs_spill + 64 + stqa $5, regs_spill + 80 + stqa $6, regs_spill + 96 + stqa $7, regs_spill + 112 + stqa $8, regs_spill + 128 + stqa $9, regs_spill + 144 + stqa $10, regs_spill + 160 + stqa $11, regs_spill + 176 + stqa $12, regs_spill + 192 + stqa $13, regs_spill + 208 + stqa $14, regs_spill + 224 + stqa $15, regs_spill + 240 + + /* SPU Context Save, Step 8: Save the remaining 112 GPRs. */ + ila $3, regs_spill + 256 +save_regs: + lqr $4, save_reg_insts +save_reg_loop: + ai $4, $4, 4 + .balignl 16, 0x40200000 +save_reg_insts: /* must be quad-word aligned. */ + stqd $16, 0($3) + stqd $17, 16($3) + stqd $18, 32($3) + stqd $19, 48($3) + andi $5, $4, 0x7F + stqr $4, save_reg_insts + ai $3, $3, 64 + brnz $5, save_reg_loop + + /* Initialize the stack pointer to point to 16368 + * (16kb-16). The back chain pointer is initialized + * to NULL. + */ + il $0, 0 + il $SP, 16368 + stqd $0, 0($SP) + + /* Allocate a minimum stack frame for the called main. + * This is needed so that main has a place to save the + * link register when it calls another function. + */ + stqd $SP, -160($SP) + ai $SP, $SP, -160 + + /* Call the program's main function. */ + brsl $0, main + + /* In this case main should not return; if it does + * there has been an error in the sequence. Execute + * stop-and-signal with code=0. + */ +.global exit +.global _exit +exit: +_exit: + stop 0x0 + + /* Pad the size of this crt0.o to be multiple of 16 bytes. */ +.balignl 16, 0x0 + diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped new file mode 100644 index 0000000000..b9f81ac8a6 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped @@ -0,0 +1,743 @@ +/* + * spu_save_dump.h: Copyright (C) 2005 IBM. + * Hex-dump auto generated from spu_save.c. + * Do not edit! + */ +static unsigned int spu_save_code[] __attribute__((__aligned__(128))) = { +0x20805000, +0x20805201, +0x20805402, +0x20805603, +0x20805804, +0x20805a05, +0x20805c06, +0x20805e07, +0x20806008, +0x20806209, +0x2080640a, +0x2080660b, +0x2080680c, +0x20806a0d, +0x20806c0e, +0x20806e0f, +0x4201c003, +0x33800184, +0x1c010204, +0x40200000, +0x24000190, +0x24004191, +0x24008192, +0x2400c193, +0x141fc205, +0x23fffd84, +0x1c100183, +0x217ffb85, +0x40800000, +0x409ff801, +0x24000080, +0x24fd8081, +0x1cd80081, +0x33000180, +0x00000000, +0x00000000, +0x01a00182, +0x3ec00083, +0xb1c38103, +0x01a00204, +0x3ec10082, +0x4201400d, +0xb1c38202, +0x01a00583, +0x34218682, +0x3ed80684, +0xb0408184, +0x24218682, +0x01a00603, +0x00200000, +0x34214682, +0x3ed40684, +0xb0408184, +0x40800003, +0x24214682, +0x21a00083, +0x40800082, +0x21a00b02, +0x4020007f, +0x1000251e, +0x42a00002, +0x32800008, +0x4205c00c, +0x00200000, +0x40a0000b, +0x3f82070f, +0x4080020a, +0x40800709, +0x3fe3078f, +0x3fbf0783, +0x3f200183, +0x3fbe0183, +0x3fe30187, +0x18008387, +0x4205c002, +0x3ac30404, +0x1cffc489, +0x00200000, +0x18008403, +0x38830402, +0x4cffc486, +0x3ac28185, +0xb0408584, +0x28830402, +0x1c020408, +0x38828182, +0xb0408385, +0x1802c387, +0x28828182, +0x217ff886, +0x04000582, +0x32800007, +0x21a00802, +0x3fbf0705, +0x3f200285, +0x3fbe0285, +0x3fe30285, +0x21a00885, +0x04000603, +0x21a00903, +0x40803c02, +0x21a00982, +0x04000386, +0x21a00a06, +0x40801202, +0x21a00a82, +0x73000003, +0x24200683, +0x01a00404, +0x00200000, +0x34204682, +0x3ec40683, +0xb0408203, +0x24204682, +0x01a00783, +0x00200000, +0x3421c682, +0x3edc0684, +0xb0408184, +0x2421c682, +0x21a00806, +0x21a00885, +0x3fbf0784, +0x3f200204, +0x3fbe0204, +0x3fe30204, +0x21a00904, +0x40804002, +0x21a00982, +0x21a00a06, +0x40805a02, +0x21a00a82, +0x04000683, +0x21a00803, +0x21a00885, +0x21a00904, +0x40848002, +0x21a00982, +0x21a00a06, +0x40801002, +0x21a00a82, +0x21a00a06, +0x40806602, +0x00200000, +0x35800009, +0x21a00a82, +0x40800083, +0x21a00b83, +0x01a00c02, +0x01a00d83, +0x00003ffb, +0x40800003, +0x4020007f, +0x35000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +0x00000000, +}; diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h new file mode 100644 index 0000000000..4fc1ebb45e --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * utils.h: Utilities for SPU-side of the context switch operation. + * + * (C) Copyright IBM 2005 + */ + +#ifndef _SPU_CONTEXT_UTILS_H_ +#define _SPU_CONTEXT_UTILS_H_ + +/* + * 64-bit safe EA. + */ +typedef union { + unsigned long long ull; + unsigned int ui[2]; +} addr64; + +/* + * 128-bit register template. + */ +typedef union { + unsigned int slot[4]; + vector unsigned int v; +} spu_reg128v; + +/* + * DMA list structure. + */ +struct dma_list_elem { + unsigned int size; + unsigned int ea_low; +}; + +/* + * Declare storage for 8-byte aligned DMA list. + */ +struct dma_list_elem dma_list[15] __attribute__ ((aligned(8))); + +/* + * External definition for storage + * declared in crt0. + */ +extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS]; + +/* + * Compute LSCSA byte offset for a given field. + */ +static struct spu_lscsa *dummy = (struct spu_lscsa *)0; +#define LSCSA_BYTE_OFFSET(_field) \ + ((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0]))) +#define LSCSA_QW_OFFSET(_field) (LSCSA_BYTE_OFFSET(_field) >> 4) + +static inline void set_event_mask(void) +{ + unsigned int event_mask = 0; + + /* Save, Step 4: + * Restore, Step 1: + * Set the SPU_RdEventMsk channel to zero to mask + * all events. + */ + spu_writech(SPU_WrEventMask, event_mask); +} + +static inline void set_tag_mask(void) +{ + unsigned int tag_mask = 1; + + /* Save, Step 5: + * Restore, Step 2: + * Set the SPU_WrTagMsk channel to '01' to unmask + * only tag group 0. + */ + spu_writech(MFC_WrTagMask, tag_mask); +} + +static inline void build_dma_list(addr64 lscsa_ea) +{ + unsigned int ea_low; + int i; + + /* Save, Step 6: + * Restore, Step 3: + * Update the effective address for the CSA in the + * pre-canned DMA-list in local storage. + */ + ea_low = lscsa_ea.ui[1]; + ea_low += LSCSA_BYTE_OFFSET(ls[16384]); + + for (i = 0; i < 15; i++, ea_low += 16384) { + dma_list[i].size = 16384; + dma_list[i].ea_low = ea_low; + } +} + +static inline void enqueue_putllc(addr64 lscsa_ea) +{ + unsigned int ls = 0; + unsigned int size = 128; + unsigned int tag_id = 0; + unsigned int cmd = 0xB4; /* PUTLLC */ + + /* Save, Step 12: + * Restore, Step 7: + * Send a PUTLLC (tag 0) command to the MFC using + * an effective address in the CSA in order to + * remove any possible lock-line reservation. + */ + spu_writech(MFC_LSA, ls); + spu_writech(MFC_EAH, lscsa_ea.ui[0]); + spu_writech(MFC_EAL, lscsa_ea.ui[1]); + spu_writech(MFC_Size, size); + spu_writech(MFC_TagID, tag_id); + spu_writech(MFC_Cmd, cmd); +} + +static inline void set_tag_update(void) +{ + unsigned int update_any = 1; + + /* Save, Step 15: + * Restore, Step 8: + * Write the MFC_TagUpdate channel with '01'. + */ + spu_writech(MFC_WrTagUpdate, update_any); +} + +static inline void read_tag_status(void) +{ + /* Save, Step 16: + * Restore, Step 9: + * Read the MFC_TagStat channel data. + */ + spu_readch(MFC_RdTagStat); +} + +static inline void read_llar_status(void) +{ + /* Save, Step 17: + * Restore, Step 10: + * Read the MFC_AtomicStat channel data. + */ + spu_readch(MFC_RdAtomicStat); +} + +#endif /* _SPU_CONTEXT_UTILS_H_ */ diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h new file mode 100644 index 0000000000..84958487f6 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SPU file system + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann + */ +#ifndef SPUFS_H +#define SPUFS_H + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define SPUFS_PS_MAP_SIZE 0x20000 +#define SPUFS_MFC_MAP_SIZE 0x1000 +#define SPUFS_CNTL_MAP_SIZE 0x1000 +#define SPUFS_SIGNAL_MAP_SIZE PAGE_SIZE +#define SPUFS_MSS_MAP_SIZE 0x1000 + +/* The magic number for our file system */ +enum { + SPUFS_MAGIC = 0x23c9b64e, +}; + +struct spu_context_ops; +struct spu_gang; + +/* ctx->sched_flags */ +enum { + SPU_SCHED_NOTIFY_ACTIVE, + SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */ + SPU_SCHED_SPU_RUN, /* context is within spu_run */ +}; + +enum { + SWITCH_LOG_BUFSIZE = 4096, +}; + +enum { + SWITCH_LOG_START, + SWITCH_LOG_STOP, + SWITCH_LOG_EXIT, +}; + +struct switch_log { + wait_queue_head_t wait; + unsigned long head; + unsigned long tail; + struct switch_log_entry { + struct timespec64 tstamp; + s32 spu_id; + u32 type; + u32 val; + u64 timebase; + } log[]; +}; + +struct spu_context { + struct spu *spu; /* pointer to a physical SPU */ + struct spu_state csa; /* SPU context save area. */ + spinlock_t mmio_lock; /* protects mmio access */ + struct address_space *local_store; /* local store mapping. */ + struct address_space *mfc; /* 'mfc' area mappings. */ + struct address_space *cntl; /* 'control' area mappings. */ + struct address_space *signal1; /* 'signal1' area mappings. */ + struct address_space *signal2; /* 'signal2' area mappings. */ + struct address_space *mss; /* 'mss' area mappings. */ + struct address_space *psmap; /* 'psmap' area mappings. */ + struct mutex mapping_lock; + u64 object_id; /* user space pointer for GNU Debugger */ + + enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; + struct mutex state_mutex; + struct mutex run_mutex; + + struct mm_struct *owner; + + struct kref kref; + wait_queue_head_t ibox_wq; + wait_queue_head_t wbox_wq; + wait_queue_head_t stop_wq; + wait_queue_head_t mfc_wq; + wait_queue_head_t run_wq; + u32 tagwait; + struct spu_context_ops *ops; + struct work_struct reap_work; + unsigned long flags; + unsigned long event_return; + + struct list_head gang_list; + struct spu_gang *gang; + struct kref *prof_priv_kref; + void ( * prof_priv_release) (struct kref *kref); + + /* owner thread */ + pid_t tid; + + /* scheduler fields */ + struct list_head rq; + unsigned int time_slice; + unsigned long sched_flags; + cpumask_t cpus_allowed; + int policy; + int prio; + int last_ran; + + /* statistics */ + struct { + /* updates protected by ctx->state_mutex */ + enum spu_utilization_state util_state; + unsigned long long tstamp; /* time of last state switch */ + unsigned long long times[SPU_UTIL_MAX]; + unsigned long long vol_ctx_switch; + unsigned long long invol_ctx_switch; + unsigned long long min_flt; + unsigned long long maj_flt; + unsigned long long hash_flt; + unsigned long long slb_flt; + unsigned long long slb_flt_base; /* # at last ctx switch */ + unsigned long long class2_intr; + unsigned long long class2_intr_base; /* # at last ctx switch */ + unsigned long long libassist; + } stats; + + /* context switch log */ + struct switch_log *switch_log; + + struct list_head aff_list; + int aff_head; + int aff_offset; +}; + +struct spu_gang { + struct list_head list; + struct mutex mutex; + struct kref kref; + int contexts; + + struct spu_context *aff_ref_ctx; + struct list_head aff_list_head; + struct mutex aff_mutex; + int aff_flags; + struct spu *aff_ref_spu; + atomic_t aff_sched_count; +}; + +/* Flag bits for spu_gang aff_flags */ +#define AFF_OFFSETS_SET 1 +#define AFF_MERGED 2 + +struct mfc_dma_command { + int32_t pad; /* reserved */ + uint32_t lsa; /* local storage address */ + uint64_t ea; /* effective address */ + uint16_t size; /* transfer size */ + uint16_t tag; /* command tag */ + uint16_t class; /* class ID */ + uint16_t cmd; /* command opcode */ +}; + + +/* SPU context query/set operations. */ +struct spu_context_ops { + int (*mbox_read) (struct spu_context * ctx, u32 * data); + u32(*mbox_stat_read) (struct spu_context * ctx); + __poll_t (*mbox_stat_poll)(struct spu_context *ctx, __poll_t events); + int (*ibox_read) (struct spu_context * ctx, u32 * data); + int (*wbox_write) (struct spu_context * ctx, u32 data); + u32(*signal1_read) (struct spu_context * ctx); + void (*signal1_write) (struct spu_context * ctx, u32 data); + u32(*signal2_read) (struct spu_context * ctx); + void (*signal2_write) (struct spu_context * ctx, u32 data); + void (*signal1_type_set) (struct spu_context * ctx, u64 val); + u64(*signal1_type_get) (struct spu_context * ctx); + void (*signal2_type_set) (struct spu_context * ctx, u64 val); + u64(*signal2_type_get) (struct spu_context * ctx); + u32(*npc_read) (struct spu_context * ctx); + void (*npc_write) (struct spu_context * ctx, u32 data); + u32(*status_read) (struct spu_context * ctx); + char*(*get_ls) (struct spu_context * ctx); + void (*privcntl_write) (struct spu_context *ctx, u64 data); + u32 (*runcntl_read) (struct spu_context * ctx); + void (*runcntl_write) (struct spu_context * ctx, u32 data); + void (*runcntl_stop) (struct spu_context * ctx); + void (*master_start) (struct spu_context * ctx); + void (*master_stop) (struct spu_context * ctx); + int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode); + u32 (*read_mfc_tagstatus)(struct spu_context * ctx); + u32 (*get_mfc_free_elements)(struct spu_context *ctx); + int (*send_mfc_command)(struct spu_context * ctx, + struct mfc_dma_command * cmd); + void (*dma_info_read) (struct spu_context * ctx, + struct spu_dma_info * info); + void (*proxydma_info_read) (struct spu_context * ctx, + struct spu_proxydma_info * info); + void (*restart_dma)(struct spu_context *ctx); +}; + +extern struct spu_context_ops spu_hw_ops; +extern struct spu_context_ops spu_backing_ops; + +struct spufs_inode_info { + struct spu_context *i_ctx; + struct spu_gang *i_gang; + struct inode vfs_inode; + int i_openers; +}; +#define SPUFS_I(inode) \ + container_of(inode, struct spufs_inode_info, vfs_inode) + +struct spufs_tree_descr { + const char *name; + const struct file_operations *ops; + umode_t mode; + size_t size; +}; + +extern const struct spufs_tree_descr spufs_dir_contents[]; +extern const struct spufs_tree_descr spufs_dir_nosched_contents[]; +extern const struct spufs_tree_descr spufs_dir_debug_contents[]; + +/* system call implementation */ +extern struct spufs_calls spufs_calls; +struct coredump_params; +long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); +long spufs_create(const struct path *nd, struct dentry *dentry, unsigned int flags, + umode_t mode, struct file *filp); +/* ELF coredump callbacks for writing SPU ELF notes */ +extern int spufs_coredump_extra_notes_size(void); +extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm); + +extern const struct file_operations spufs_context_fops; + +/* gang management */ +struct spu_gang *alloc_spu_gang(void); +struct spu_gang *get_spu_gang(struct spu_gang *gang); +int put_spu_gang(struct spu_gang *gang); +void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx); +void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx); + +/* fault handling */ +int spufs_handle_class1(struct spu_context *ctx); +int spufs_handle_class0(struct spu_context *ctx); + +/* affinity */ +struct spu *affinity_check(struct spu_context *ctx); + +/* context management */ +extern atomic_t nr_spu_contexts; +static inline int __must_check spu_acquire(struct spu_context *ctx) +{ + return mutex_lock_interruptible(&ctx->state_mutex); +} + +static inline void spu_release(struct spu_context *ctx) +{ + mutex_unlock(&ctx->state_mutex); +} + +struct spu_context * alloc_spu_context(struct spu_gang *gang); +void destroy_spu_context(struct kref *kref); +struct spu_context * get_spu_context(struct spu_context *ctx); +int put_spu_context(struct spu_context *ctx); +void spu_unmap_mappings(struct spu_context *ctx); + +void spu_forget(struct spu_context *ctx); +int __must_check spu_acquire_saved(struct spu_context *ctx); +void spu_release_saved(struct spu_context *ctx); + +int spu_stopped(struct spu_context *ctx, u32 * stat); +void spu_del_from_rq(struct spu_context *ctx); +int spu_activate(struct spu_context *ctx, unsigned long flags); +void spu_deactivate(struct spu_context *ctx); +void spu_yield(struct spu_context *ctx); +void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, + u32 type, u32 val); +void spu_set_timeslice(struct spu_context *ctx); +void spu_update_sched_info(struct spu_context *ctx); +void __spu_update_sched_info(struct spu_context *ctx); +int __init spu_sched_init(void); +void spu_sched_exit(void); + +extern char *isolated_loader; + +/* + * spufs_wait + * Same as wait_event_interruptible(), except that here + * we need to call spu_release(ctx) before sleeping, and + * then spu_acquire(ctx) when awoken. + * + * Returns with state_mutex re-acquired when successful or + * with -ERESTARTSYS and the state_mutex dropped when interrupted. + */ + +#define spufs_wait(wq, condition) \ +({ \ + int __ret = 0; \ + DEFINE_WAIT(__wait); \ + for (;;) { \ + prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + spu_release(ctx); \ + if (signal_pending(current)) { \ + __ret = -ERESTARTSYS; \ + break; \ + } \ + schedule(); \ + __ret = spu_acquire(ctx); \ + if (__ret) \ + break; \ + } \ + finish_wait(&(wq), &__wait); \ + __ret; \ +}) + +size_t spu_wbox_write(struct spu_context *ctx, u32 data); +size_t spu_ibox_read(struct spu_context *ctx, u32 *data); + +/* irq callback funcs. */ +void spufs_ibox_callback(struct spu *spu); +void spufs_wbox_callback(struct spu *spu); +void spufs_stop_callback(struct spu *spu, int irq); +void spufs_mfc_callback(struct spu *spu); +void spufs_dma_callback(struct spu *spu, int type); + +struct spufs_coredump_reader { + char *name; + ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm); + u64 (*get)(struct spu_context *ctx); + size_t size; +}; +extern const struct spufs_coredump_reader spufs_coredump_read[]; + +extern int spu_init_csa(struct spu_state *csa); +extern void spu_fini_csa(struct spu_state *csa); +extern int spu_save(struct spu_state *prev, struct spu *spu); +extern int spu_restore(struct spu_state *new, struct spu *spu); +extern int spu_switch(struct spu_state *prev, struct spu_state *new, + struct spu *spu); +extern int spu_alloc_lscsa(struct spu_state *csa); +extern void spu_free_lscsa(struct spu_state *csa); + +extern void spuctx_switch_state(struct spu_context *ctx, + enum spu_utilization_state new_state); + +#endif diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h new file mode 100644 index 0000000000..1def11e911 --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/sputrace.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SPUFS_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM spufs + +TRACE_EVENT(spufs_context, + TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name), + TP_ARGS(ctx, spu, name), + + TP_STRUCT__entry( + __field(const char *, name) + __field(int, owner_tid) + __field(int, number) + ), + + TP_fast_assign( + __entry->name = name; + __entry->owner_tid = ctx->tid; + __entry->number = spu ? spu->number : -1; + ), + + TP_printk("%s (ctxthread = %d, spu = %d)", + __entry->name, __entry->owner_tid, __entry->number) +); + +#define spu_context_trace(name, ctx, spu) \ + trace_spufs_context(ctx, spu, __stringify(name)) +#define spu_context_nospu_trace(name, ctx) \ + trace_spufs_context(ctx, NULL, __stringify(name)) + +#endif /* _TRACE_SPUFS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE sputrace +#include diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c new file mode 100644 index 0000000000..b41e81b22f --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -0,0 +1,2206 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * spu_switch.c + * + * (C) Copyright IBM Corp. 2005 + * + * Author: Mark Nutter + * + * Host-side part of SPU context switch sequence outlined in + * Synergistic Processor Element, Book IV. + * + * A fully premptive switch of an SPE is very expensive in terms + * of time and system resources. SPE Book IV indicates that SPE + * allocation should follow a "serially reusable device" model, + * in which the SPE is assigned a task until it completes. When + * this is not possible, this sequence may be used to premptively + * save, and then later (optionally) restore the context of a + * program executing on an SPE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "spufs.h" + +#include "spu_save_dump.h" +#include "spu_restore_dump.h" + +#if 0 +#define POLL_WHILE_TRUE(_c) { \ + do { \ + } while (_c); \ + } +#else +#define RELAX_SPIN_COUNT 1000 +#define POLL_WHILE_TRUE(_c) { \ + do { \ + int _i; \ + for (_i=0; _iproblem; + u32 isolate_state; + + /* Save, Step 2: + * Save, Step 6: + * If SPU_Status[E,L,IS] any field is '1', this + * SPU is in isolate state and cannot be context + * saved at this time. + */ + isolate_state = SPU_STATUS_ISOLATED_STATE | + SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS; + return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0; +} + +static inline void disable_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 3: + * Restore, Step 2: + * Save INT_Mask_class0 in CSA. + * Write INT_MASK_class0 with value of 0. + * Save INT_Mask_class1 in CSA. + * Write INT_MASK_class1 with value of 0. + * Save INT_Mask_class2 in CSA. + * Write INT_MASK_class2 with value of 0. + * Synchronize all three interrupts to be sure + * we no longer execute a handler on another CPU. + */ + spin_lock_irq(&spu->register_lock); + if (csa) { + csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0); + csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1); + csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2); + } + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, 0ul); + spu_int_mask_set(spu, 2, 0ul); + eieio(); + spin_unlock_irq(&spu->register_lock); + + /* + * This flag needs to be set before calling synchronize_irq so + * that the update will be visible to the relevant handlers + * via a simple load. + */ + set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); + clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags); + synchronize_irq(spu->irqs[0]); + synchronize_irq(spu->irqs[1]); + synchronize_irq(spu->irqs[2]); +} + +static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 4: + * Restore, Step 25. + * Set a software watchdog timer, which specifies the + * maximum allowable time for a context save sequence. + * + * For present, this implementation will not set a global + * watchdog timer, as virtualization & variable system load + * may cause unpredictable execution times. + */ +} + +static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 5: + * Restore, Step 3: + * Inhibit user-space access (if provided) to this + * SPU by unmapping the virtual pages assigned to + * the SPU memory-mapped I/O (MMIO) for problem + * state. TBD. + */ +} + +static inline void set_switch_pending(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 7: + * Restore, Step 5: + * Set a software context switch pending flag. + * Done above in Step 3 - disable_interrupts(). + */ +} + +static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 8: + * Suspend DMA and save MFC_CNTL. + */ + switch (in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) { + case MFC_CNTL_SUSPEND_IN_PROGRESS: + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); + fallthrough; + case MFC_CNTL_SUSPEND_COMPLETE: + if (csa) + csa->priv2.mfc_control_RW = + in_be64(&priv2->mfc_control_RW) | + MFC_CNTL_SUSPEND_DMA_QUEUE; + break; + case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION: + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); + if (csa) + csa->priv2.mfc_control_RW = + in_be64(&priv2->mfc_control_RW) & + ~MFC_CNTL_SUSPEND_DMA_QUEUE & + ~MFC_CNTL_SUSPEND_MASK; + break; + } +} + +static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 9: + * Save SPU_Runcntl in the CSA. This value contains + * the "Application Desired State". + */ + csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW); +} + +static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 10: + * Save MFC_SR1 in the CSA. + */ + csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu); +} + +static inline void save_spu_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 11: + * Read SPU_Status[R], and save to CSA. + */ + if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) { + csa->prob.spu_status_R = in_be32(&prob->spu_status_R); + } else { + u32 stopped; + + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + stopped = + SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if ((in_be32(&prob->spu_status_R) & stopped) == 0) + csa->prob.spu_status_R = SPU_STATUS_RUNNING; + else + csa->prob.spu_status_R = in_be32(&prob->spu_status_R); + } +} + +static inline void save_mfc_stopped_status(struct spu_state *csa, + struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + const u64 mask = MFC_CNTL_DECREMENTER_RUNNING | + MFC_CNTL_DMA_QUEUES_EMPTY; + + /* Save, Step 12: + * Read MFC_CNTL[Ds]. Update saved copy of + * CSA.MFC_CNTL[Ds]. + * + * update: do the same with MFC_CNTL[Q]. + */ + csa->priv2.mfc_control_RW &= ~mask; + csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask; +} + +static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 13: + * Write MFC_CNTL[Dh] set to a '1' to halt + * the decrementer. + */ + out_be64(&priv2->mfc_control_RW, + MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK); + eieio(); +} + +static inline void save_timebase(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 14: + * Read PPE Timebase High and Timebase low registers + * and save in CSA. TBD. + */ + csa->suspend_time = get_cycles(); +} + +static inline void remove_other_spu_access(struct spu_state *csa, + struct spu *spu) +{ + /* Save, Step 15: + * Remove other SPU access to this SPU by unmapping + * this SPU's pages from their address space. TBD. + */ +} + +static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 16: + * Restore, Step 11. + * Write SPU_MSSync register. Poll SPU_MSSync[P] + * for a value of 0. + */ + out_be64(&prob->spc_mssync_RW, 1UL); + POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING); +} + +static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 17: + * Restore, Step 12. + * Restore, Step 48. + * Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register. + * Then issue a PPE sync instruction. + */ + spu_tlb_invalidate(spu); + mb(); +} + +static inline void handle_pending_interrupts(struct spu_state *csa, + struct spu *spu) +{ + /* Save, Step 18: + * Handle any pending interrupts from this SPU + * here. This is OS or hypervisor specific. One + * option is to re-enable interrupts to handle any + * pending interrupts, with the interrupt handlers + * recognizing the software Context Switch Pending + * flag, to ensure the SPU execution or MFC command + * queue is not restarted. TBD. + */ +} + +static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Save, Step 19: + * If MFC_Cntl[Se]=0 then save + * MFC command queues. + */ + if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) { + for (i = 0; i < 8; i++) { + csa->priv2.puq[i].mfc_cq_data0_RW = + in_be64(&priv2->puq[i].mfc_cq_data0_RW); + csa->priv2.puq[i].mfc_cq_data1_RW = + in_be64(&priv2->puq[i].mfc_cq_data1_RW); + csa->priv2.puq[i].mfc_cq_data2_RW = + in_be64(&priv2->puq[i].mfc_cq_data2_RW); + csa->priv2.puq[i].mfc_cq_data3_RW = + in_be64(&priv2->puq[i].mfc_cq_data3_RW); + } + for (i = 0; i < 16; i++) { + csa->priv2.spuq[i].mfc_cq_data0_RW = + in_be64(&priv2->spuq[i].mfc_cq_data0_RW); + csa->priv2.spuq[i].mfc_cq_data1_RW = + in_be64(&priv2->spuq[i].mfc_cq_data1_RW); + csa->priv2.spuq[i].mfc_cq_data2_RW = + in_be64(&priv2->spuq[i].mfc_cq_data2_RW); + csa->priv2.spuq[i].mfc_cq_data3_RW = + in_be64(&priv2->spuq[i].mfc_cq_data3_RW); + } + } +} + +static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 20: + * Save the PPU_QueryMask register + * in the CSA. + */ + csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW); +} + +static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 21: + * Save the PPU_QueryType register + * in the CSA. + */ + csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW); +} + +static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save the Prxy_TagStatus register in the CSA. + * + * It is unnecessary to restore dma_tagstatus_R, however, + * dma_tagstatus_R in the CSA is accessed via backing_ops, so + * we must save it. + */ + csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R); +} + +static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 22: + * Save the MFC_CSR_TSQ register + * in the LSCSA. + */ + csa->priv2.spu_tag_status_query_RW = + in_be64(&priv2->spu_tag_status_query_RW); +} + +static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 23: + * Save the MFC_CSR_CMD1 and MFC_CSR_CMD2 + * registers in the CSA. + */ + csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW); + csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW); +} + +static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 24: + * Save the MFC_CSR_ATO register in + * the CSA. + */ + csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW); +} + +static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 25: + * Save the MFC_TCLASS_ID register in + * the CSA. + */ + csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu); +} + +static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 26: + * Restore, Step 23. + * Write the MFC_TCLASS_ID register with + * the value 0x10000000. + */ + spu_mfc_tclass_id_set(spu, 0x10000000); + eieio(); +} + +static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 27: + * Restore, Step 14. + * Write MFC_CNTL[Pc]=1 (purge queue). + */ + out_be64(&priv2->mfc_control_RW, + MFC_CNTL_PURGE_DMA_REQUEST | + MFC_CNTL_SUSPEND_MASK); + eieio(); +} + +static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 28: + * Poll MFC_CNTL[Ps] until value '11' is read + * (purge complete). + */ + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_PURGE_DMA_STATUS_MASK) == + MFC_CNTL_PURGE_DMA_COMPLETE); +} + +static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 30: + * Restore, Step 18: + * Write MFC_SR1 with MFC_SR1[D=0,S=1] and + * MFC_SR1[TL,R,Pr,T] set correctly for the + * OS specific environment. + * + * Implementation note: The SPU-side code + * for save/restore is privileged, so the + * MFC_SR1[Pr] bit is not set. + * + */ + spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK | + MFC_STATE1_RELOCATE_MASK | + MFC_STATE1_BUS_TLBIE_MASK)); +} + +static inline void save_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 31: + * Save SPU_NPC in the CSA. + */ + csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW); +} + +static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 32: + * Save SPU_PrivCntl in the CSA. + */ + csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW); +} + +static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 33: + * Restore, Step 16: + * Write SPU_PrivCntl[S,Le,A] fields reset to 0. + */ + out_be64(&priv2->spu_privcntl_RW, 0UL); + eieio(); +} + +static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 34: + * Save SPU_LSLR in the CSA. + */ + csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW); +} + +static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 35: + * Restore, Step 17. + * Reset SPU_LSLR. + */ + out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK); + eieio(); +} + +static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 36: + * Save SPU_Cfg in the CSA. + */ + csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW); +} + +static inline void save_pm_trace(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 37: + * Save PM_Trace_Tag_Wait_Mask in the CSA. + * Not performed by this implementation. + */ +} + +static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 38: + * Save RA_GROUP_ID register and the + * RA_ENABLE reigster in the CSA. + */ + csa->priv1.resource_allocation_groupID_RW = + spu_resource_allocation_groupID_get(spu); + csa->priv1.resource_allocation_enable_RW = + spu_resource_allocation_enable_get(spu); +} + +static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 39: + * Save MB_Stat register in the CSA. + */ + csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R); +} + +static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 40: + * Save the PPU_MB register in the CSA. + */ + csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R); +} + +static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 41: + * Save the PPUINT_MB register in the CSA. + */ + csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R); +} + +static inline void save_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + int i; + + /* Save, Step 42: + */ + + /* Save CH 1, without channel count */ + out_be64(&priv2->spu_chnlcntptr_RW, 1); + csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW); + + /* Save the following CH: [0,3,4,24,25,27] */ + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW); + csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW); + out_be64(&priv2->spu_chnldata_RW, 0UL); + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); + } +} + +static inline void save_spu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Save, Step 43: + * Save SPU Read Mailbox Channel. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 29UL); + eieio(); + csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW); + for (i = 0; i < 4; i++) { + csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW); + } + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); +} + +static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 44: + * Save MFC_CMD Channel. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 21UL); + eieio(); + csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW); + eieio(); +} + +static inline void reset_ch(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL }; + u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL }; + u64 idx; + int i; + + /* Save, Step 45: + * Reset the following CH: [21, 23, 28, 30] + */ + for (i = 0; i < 4; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Save, Step 46: + * Restore, Step 25. + * Write MFC_CNTL[Sc]=0 (resume queue processing). + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); +} + +static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu, + unsigned int *code, int code_size) +{ + /* Save, Step 47: + * Restore, Step 30. + * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All + * register, then initialize SLB_VSID and SLB_ESID + * to provide access to SPU context save code and + * LSCSA. + * + * This implementation places both the context + * switch code and LSCSA in kernel address space. + * + * Further this implementation assumes that the + * MFC_SR1[R]=1 (in other words, assume that + * translation is desired by OS environment). + */ + spu_invalidate_slbs(spu); + spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size); +} + +static inline void set_switch_active(struct spu_state *csa, struct spu *spu) +{ + /* Save, Step 48: + * Restore, Step 23. + * Change the software context switch pending flag + * to context switch active. This implementation does + * not uses a switch active flag. + * + * Now that we have saved the mfc in the csa, we can add in the + * restart command if an exception occurred. + */ + if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags)) + csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND; + clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); + mb(); +} + +static inline void enable_interrupts(struct spu_state *csa, struct spu *spu) +{ + unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR | + CLASS1_ENABLE_STORAGE_FAULT_INTR; + + /* Save, Step 49: + * Restore, Step 22: + * Reset and then enable interrupts, as + * needed by OS. + * + * This implementation enables only class1 + * (translation) interrupts. + */ + spin_lock_irq(&spu->register_lock); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, class1_mask); + spu_int_mask_set(spu, 2, 0ul); + spin_unlock_irq(&spu->register_lock); +} + +static inline int send_mfc_dma(struct spu *spu, unsigned long ea, + unsigned int ls_offset, unsigned int size, + unsigned int tag, unsigned int rclass, + unsigned int cmd) +{ + struct spu_problem __iomem *prob = spu->problem; + union mfc_tag_size_class_cmd command; + unsigned int transfer_size; + volatile unsigned int status = 0x0; + + while (size > 0) { + transfer_size = + (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size; + command.u.mfc_size = transfer_size; + command.u.mfc_tag = tag; + command.u.mfc_rclassid = rclass; + command.u.mfc_cmd = cmd; + do { + out_be32(&prob->mfc_lsa_W, ls_offset); + out_be64(&prob->mfc_ea_W, ea); + out_be64(&prob->mfc_union_W.all64, command.all64); + status = + in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32); + if (unlikely(status & 0x2)) { + cpu_relax(); + } + } while (status & 0x3); + size -= transfer_size; + ea += transfer_size; + ls_offset += transfer_size; + } + return 0; +} + +static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&csa->lscsa->ls[0]; + unsigned int ls_offset = 0x0; + unsigned int size = 16384; + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_PUT_CMD; + + /* Save, Step 50: + * Issue a DMA command to copy the first 16K bytes + * of local storage to the CSA. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void set_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 51: + * Restore, Step 31. + * Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry + * point address of context save code in local + * storage. + * + * This implementation uses SPU-side save/restore + * programs with entry points at LSA of 0. + */ + out_be32(&prob->spu_npc_RW, 0); + eieio(); +} + +static inline void set_signot1(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + union { + u64 ull; + u32 ui[2]; + } addr64; + + /* Save, Step 52: + * Restore, Step 32: + * Write SPU_Sig_Notify_1 register with upper 32-bits + * of the CSA.LSCSA effective address. + */ + addr64.ull = (u64) csa->lscsa; + out_be32(&prob->signal_notify1, addr64.ui[0]); + eieio(); +} + +static inline void set_signot2(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + union { + u64 ull; + u32 ui[2]; + } addr64; + + /* Save, Step 53: + * Restore, Step 33: + * Write SPU_Sig_Notify_2 register with lower 32-bits + * of the CSA.LSCSA effective address. + */ + addr64.ull = (u64) csa->lscsa; + out_be32(&prob->signal_notify2, addr64.ui[1]); + eieio(); +} + +static inline void send_save_code(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&spu_save_code[0]; + unsigned int ls_offset = 0x0; + unsigned int size = sizeof(spu_save_code); + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GETFS_CMD; + + /* Save, Step 54: + * Issue a DMA command to copy context save code + * to local storage and start SPU. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Save, Step 55: + * Restore, Step 38. + * Write PPU_QueryMask=1 (enable Tag Group 0) + * and issue eieio instruction. + */ + out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0)); + eieio(); +} + +static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask = MFC_TAGID_TO_TAGMASK(0); + unsigned long flags; + + /* Save, Step 56: + * Restore, Step 39. + * Restore, Step 39. + * Restore, Step 46. + * Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete) + * or write PPU_QueryType[TS]=01 and wait for Tag Group + * Complete Interrupt. Write INT_Stat_Class0 or + * INT_Stat_Class2 with value of 'handled'. + */ + POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask); + + local_irq_save(flags); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + local_irq_restore(flags); +} + +static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + unsigned long flags; + + /* Save, Step 57: + * Restore, Step 40. + * Poll until SPU_Status[R]=0 or wait for SPU Class 0 + * or SPU Class 2 interrupt. Write INT_Stat_class0 + * or INT_Stat_class2 with value of handled. + */ + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); + + local_irq_save(flags); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + local_irq_restore(flags); +} + +static inline int check_save_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 complete; + + /* Save, Step 54: + * If SPU_Status[P]=1 and SPU_Status[SC] = "success", + * context save succeeded, otherwise context save + * failed. + */ + complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) | + SPU_STATUS_STOPPED_BY_STOP); + return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0; +} + +static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 4: + * If required, notify the "using application" that + * the SPU task has been terminated. TBD. + */ +} + +static inline void suspend_mfc_and_halt_decr(struct spu_state *csa, + struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 7: + * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend + * the queue and halt the decrementer. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE | + MFC_CNTL_DECREMENTER_HALTED); + eieio(); +} + +static inline void wait_suspend_mfc_complete(struct spu_state *csa, + struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 8: + * Restore, Step 47. + * Poll MFC_CNTL[Ss] until 11 is returned. + */ + POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) & + MFC_CNTL_SUSPEND_DMA_STATUS_MASK) == + MFC_CNTL_SUSPEND_COMPLETE); +} + +static inline int suspend_spe(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 9: + * If SPU_Status[R]=1, stop SPU execution + * and wait for stop to complete. + * + * Returns 1 if SPU_Status[R]=1 on entry. + * 0 otherwise + */ + if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) { + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_EXIT_STATUS) { + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if ((in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_LOAD_STATUS) + || (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_STATE)) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + out_be32(&prob->spu_runcntl_RW, 0x2); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_WAITING_FOR_CHANNEL) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + return 1; + } + return 0; +} + +static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 10: + * If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1, + * release SPU from isolate state. + */ + if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) { + if (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_EXIT_STATUS) { + spu_mfc_sr1_set(spu, + MFC_STATE1_MASTER_RUN_CONTROL_MASK); + eieio(); + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + if ((in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_LOAD_STATUS) + || (in_be32(&prob->spu_status_R) & + SPU_STATUS_ISOLATED_STATE)) { + spu_mfc_sr1_set(spu, + MFC_STATE1_MASTER_RUN_CONTROL_MASK); + eieio(); + out_be32(&prob->spu_runcntl_RW, 0x2); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } + } +} + +static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + u64 idx; + int i; + + /* Restore, Step 20: + */ + + /* Reset CH 1 */ + out_be64(&priv2->spu_chnlcntptr_RW, 1); + out_be64(&priv2->spu_chnldata_RW, 0UL); + + /* Reset the following CH: [0,3,4,24,25,27] */ + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnldata_RW, 0UL); + out_be64(&priv2->spu_chnlcnt_RW, 0UL); + eieio(); + } +} + +static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL }; + u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL }; + u64 idx; + int i; + + /* Restore, Step 21: + * Reset the following CH: [21, 23, 28, 29, 30] + */ + for (i = 0; i < 5; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void setup_spu_status_part1(struct spu_state *csa, + struct spu *spu) +{ + u32 status_P = SPU_STATUS_STOPPED_BY_STOP; + u32 status_I = SPU_STATUS_INVALID_INSTR; + u32 status_H = SPU_STATUS_STOPPED_BY_HALT; + u32 status_S = SPU_STATUS_SINGLE_STEP; + u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR; + u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP; + u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP; + u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR; + u32 status_code; + + /* Restore, Step 27: + * If the CSA.SPU_Status[I,S,H,P]=1 then add the correct + * instruction sequence to the end of the SPU based restore + * code (after the "context restored" stop and signal) to + * restore the correct SPU status. + * + * NOTE: Rather than modifying the SPU executable, we + * instead add a new 'stopped_status' field to the + * LSCSA. The SPU-side restore reads this field and + * takes the appropriate action when exiting. + */ + + status_code = + (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF; + if ((csa->prob.spu_status_R & status_P_I) == status_P_I) { + + /* SPU_Status[P,I]=1 - Illegal Instruction followed + * by Stop and Signal instruction, followed by 'br -4'. + * + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) { + + /* SPU_Status[P,H]=1 - Halt Conditional, followed + * by Stop and Signal instruction, followed by + * 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) { + + /* SPU_Status[S,P]=1 - Stop and Signal instruction + * followed by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) { + + /* SPU_Status[S,I]=1 - Illegal instruction followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_P) == status_P) { + + /* SPU_Status[P]=1 - Stop and Signal instruction + * followed by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P; + csa->lscsa->stopped_status.slot[1] = status_code; + + } else if ((csa->prob.spu_status_R & status_H) == status_H) { + + /* SPU_Status[H]=1 - Halt Conditional, followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H; + + } else if ((csa->prob.spu_status_R & status_S) == status_S) { + + /* SPU_Status[S]=1 - Two nop instructions. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S; + + } else if ((csa->prob.spu_status_R & status_I) == status_I) { + + /* SPU_Status[I]=1 - Illegal instruction followed + * by 'br -4'. + */ + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I; + + } +} + +static inline void setup_spu_status_part2(struct spu_state *csa, + struct spu *spu) +{ + u32 mask; + + /* Restore, Step 28: + * If the CSA.SPU_Status[I,S,H,P,R]=0 then + * add a 'br *' instruction to the end of + * the SPU based restore code. + * + * NOTE: Rather than modifying the SPU executable, we + * instead add a new 'stopped_status' field to the + * LSCSA. The SPU-side restore reads this field and + * takes the appropriate action when exiting. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING; + if (!(csa->prob.spu_status_R & mask)) { + csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R; + } +} + +static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 29: + * Restore RA_GROUP_ID register and the + * RA_ENABLE reigster from the CSA. + */ + spu_resource_allocation_groupID_set(spu, + csa->priv1.resource_allocation_groupID_RW); + spu_resource_allocation_enable_set(spu, + csa->priv1.resource_allocation_enable_RW); +} + +static inline void send_restore_code(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&spu_restore_code[0]; + unsigned int ls_offset = 0x0; + unsigned int size = sizeof(spu_restore_code); + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GETFS_CMD; + + /* Restore, Step 37: + * Issue MFC DMA command to copy context + * restore code to local storage. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void setup_decr(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 34: + * If CSA.MFC_CNTL[Ds]=1 (decrementer was + * running) then adjust decrementer, set + * decrementer running status in LSCSA, + * and set decrementer "wrapped" status + * in LSCSA. + */ + if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) { + cycles_t resume_time = get_cycles(); + cycles_t delta_time = resume_time - csa->suspend_time; + + csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING; + if (csa->lscsa->decr.slot[0] < delta_time) { + csa->lscsa->decr_status.slot[0] |= + SPU_DECR_STATUS_WRAPPED; + } + + csa->lscsa->decr.slot[0] -= delta_time; + } else { + csa->lscsa->decr_status.slot[0] = 0; + } +} + +static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 35: + * Copy the CSA.PU_MB data into the LSCSA. + */ + csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R; +} + +static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 36: + * Copy the CSA.PUINT_MB data into the LSCSA. + */ + csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R; +} + +static inline int check_restore_status(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 complete; + + /* Restore, Step 40: + * If SPU_Status[P]=1 and SPU_Status[SC] = "success", + * context restore succeeded, otherwise context restore + * failed. + */ + complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) | + SPU_STATUS_STOPPED_BY_STOP); + return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0; +} + +static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 41: + * Restore SPU_PrivCntl from the CSA. + */ + out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW); + eieio(); +} + +static inline void restore_status_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask; + + /* Restore, Step 42: + * If any CSA.SPU_Status[I,S,H,P]=1, then + * restore the error or single step state. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP; + if (csa->prob.spu_status_R & mask) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } +} + +static inline void restore_status_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + u32 mask; + + /* Restore, Step 43: + * If all CSA.SPU_Status[I,S,H,P,R]=0 then write + * SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1, + * then write '00' to SPU_RunCntl[R0R1] and wait + * for SPU_Status[R]=0. + */ + mask = SPU_STATUS_INVALID_INSTR | + SPU_STATUS_SINGLE_STEP | + SPU_STATUS_STOPPED_BY_HALT | + SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING; + if (!(csa->prob.spu_status_R & mask)) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + eieio(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & + SPU_STATUS_RUNNING); + } +} + +static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu) +{ + unsigned long addr = (unsigned long)&csa->lscsa->ls[0]; + unsigned int ls_offset = 0x0; + unsigned int size = 16384; + unsigned int tag = 0; + unsigned int rclass = 0; + unsigned int cmd = MFC_GET_CMD; + + /* Restore, Step 44: + * Issue a DMA command to restore the first + * 16kb of local storage from CSA. + */ + send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd); +} + +static inline void suspend_mfc(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 47. + * Write MFC_Cntl[Sc,Sm]='1','0' to suspend + * the queue. + */ + out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE); + eieio(); +} + +static inline void clear_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 49: + * Write INT_MASK_class0 with value of 0. + * Write INT_MASK_class1 with value of 0. + * Write INT_MASK_class2 with value of 0. + * Write INT_STAT_class0 with value of -1. + * Write INT_STAT_class1 with value of -1. + * Write INT_STAT_class2 with value of -1. + */ + spin_lock_irq(&spu->register_lock); + spu_int_mask_set(spu, 0, 0ul); + spu_int_mask_set(spu, 1, 0ul); + spu_int_mask_set(spu, 2, 0ul); + spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK); + spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK); + spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK); + spin_unlock_irq(&spu->register_lock); +} + +static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Restore, Step 50: + * If MFC_Cntl[Se]!=0 then restore + * MFC command queues. + */ + if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) { + for (i = 0; i < 8; i++) { + out_be64(&priv2->puq[i].mfc_cq_data0_RW, + csa->priv2.puq[i].mfc_cq_data0_RW); + out_be64(&priv2->puq[i].mfc_cq_data1_RW, + csa->priv2.puq[i].mfc_cq_data1_RW); + out_be64(&priv2->puq[i].mfc_cq_data2_RW, + csa->priv2.puq[i].mfc_cq_data2_RW); + out_be64(&priv2->puq[i].mfc_cq_data3_RW, + csa->priv2.puq[i].mfc_cq_data3_RW); + } + for (i = 0; i < 16; i++) { + out_be64(&priv2->spuq[i].mfc_cq_data0_RW, + csa->priv2.spuq[i].mfc_cq_data0_RW); + out_be64(&priv2->spuq[i].mfc_cq_data1_RW, + csa->priv2.spuq[i].mfc_cq_data1_RW); + out_be64(&priv2->spuq[i].mfc_cq_data2_RW, + csa->priv2.spuq[i].mfc_cq_data2_RW); + out_be64(&priv2->spuq[i].mfc_cq_data3_RW, + csa->priv2.spuq[i].mfc_cq_data3_RW); + } + } + eieio(); +} + +static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 51: + * Restore the PPU_QueryMask register from CSA. + */ + out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW); + eieio(); +} + +static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 52: + * Restore the PPU_QueryType register from CSA. + */ + out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW); + eieio(); +} + +static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 53: + * Restore the MFC_CSR_TSQ register from CSA. + */ + out_be64(&priv2->spu_tag_status_query_RW, + csa->priv2.spu_tag_status_query_RW); + eieio(); +} + +static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 54: + * Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2 + * registers from CSA. + */ + out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW); + out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW); + eieio(); +} + +static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 55: + * Restore the MFC_CSR_ATO register from CSA. + */ + out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW); +} + +static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 56: + * Restore the MFC_TCLASS_ID register from CSA. + */ + spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW); + eieio(); +} + +static inline void set_llr_event(struct spu_state *csa, struct spu *spu) +{ + u64 ch0_cnt, ch0_data; + u64 ch1_data; + + /* Restore, Step 57: + * Set the Lock Line Reservation Lost Event by: + * 1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1. + * 2. If CSA.SPU_Channel_0_Count=0 and + * CSA.SPU_Wr_Event_Mask[Lr]=1 and + * CSA.SPU_Event_Status[Lr]=0 then set + * CSA.SPU_Event_Status_Count=1. + */ + ch0_cnt = csa->spu_chnlcnt_RW[0]; + ch0_data = csa->spu_chnldata_RW[0]; + ch1_data = csa->spu_chnldata_RW[1]; + csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT; + if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) && + (ch1_data & MFC_LLR_LOST_EVENT)) { + csa->spu_chnlcnt_RW[0] = 1; + } +} + +static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 58: + * If the status of the CSA software decrementer + * "wrapped" flag is set, OR in a '1' to + * CSA.SPU_Event_Status[Tm]. + */ + if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED)) + return; + + if ((csa->spu_chnlcnt_RW[0] == 0) && + (csa->spu_chnldata_RW[1] & 0x20) && + !(csa->spu_chnldata_RW[0] & 0x20)) + csa->spu_chnlcnt_RW[0] = 1; + + csa->spu_chnldata_RW[0] |= 0x20; +} + +static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL }; + int i; + + /* Restore, Step 59: + * Restore the following CH: [0,3,4,24,25,27] + */ + for (i = 0; i < ARRAY_SIZE(ch_indices); i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]); + out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]); + eieio(); + } +} + +static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + u64 ch_indices[3] = { 9UL, 21UL, 23UL }; + u64 ch_counts[3] = { 1UL, 16UL, 1UL }; + u64 idx; + int i; + + /* Restore, Step 60: + * Restore the following CH: [9,21,23]. + */ + ch_counts[0] = 1UL; + ch_counts[1] = csa->spu_chnlcnt_RW[21]; + ch_counts[2] = 1UL; + for (i = 0; i < 3; i++) { + idx = ch_indices[i]; + out_be64(&priv2->spu_chnlcntptr_RW, idx); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]); + eieio(); + } +} + +static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 61: + * Restore the SPU_LSLR register from CSA. + */ + out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW); + eieio(); +} + +static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 62: + * Restore the SPU_Cfg register from CSA. + */ + out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW); + eieio(); +} + +static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 63: + * Restore PM_Trace_Tag_Wait_Mask from CSA. + * Not performed by this implementation. + */ +} + +static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 64: + * Restore SPU_NPC from CSA. + */ + out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW); + eieio(); +} + +static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + int i; + + /* Restore, Step 65: + * Restore MFC_RdSPU_MB from CSA. + */ + out_be64(&priv2->spu_chnlcntptr_RW, 29UL); + eieio(); + out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]); + for (i = 0; i < 4; i++) { + out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]); + } + eieio(); +} + +static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 66: + * If CSA.MB_Stat[P]=0 (mailbox empty) then + * read from the PPU_MB register. + */ + if ((csa->prob.mb_stat_R & 0xFF) == 0) { + in_be32(&prob->pu_mb_R); + eieio(); + } +} + +static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 66: + * If CSA.MB_Stat[I]=0 (mailbox empty) then + * read from the PPUINT_MB register. + */ + if ((csa->prob.mb_stat_R & 0xFF0000) == 0) { + in_be64(&priv2->puint_mb_R); + eieio(); + spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); + eieio(); + } +} + +static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 69: + * Restore the MFC_SR1 register from CSA. + */ + spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW); + eieio(); +} + +static inline void set_int_route(struct spu_state *csa, struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + spu_cpu_affinity_set(spu, ctx->last_ran); +} + +static inline void restore_other_spu_access(struct spu_state *csa, + struct spu *spu) +{ + /* Restore, Step 70: + * Restore other SPU mappings to this SPU. TBD. + */ +} + +static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + /* Restore, Step 71: + * If CSA.SPU_Status[R]=1 then write + * SPU_RunCntl[R0R1]='01'. + */ + if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) { + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); + eieio(); + } +} + +static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu) +{ + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Restore, Step 72: + * Restore the MFC_CNTL register for the CSA. + */ + out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW); + eieio(); + + /* + * The queue is put back into the same state that was evident prior to + * the context switch. The suspend flag is added to the saved state in + * the csa, if the operational state was suspending or suspended. In + * this case, the code that suspended the mfc is responsible for + * continuing it. Note that SPE faults do not change the operational + * state of the spu. + */ +} + +static inline void enable_user_access(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 73: + * Enable user-space access (if provided) to this + * SPU by mapping the virtual pages assigned to + * the SPU memory-mapped I/O (MMIO) for problem + * state. TBD. + */ +} + +static inline void reset_switch_active(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 74: + * Reset the "context switch active" flag. + * Not performed by this implementation. + */ +} + +static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu) +{ + /* Restore, Step 75: + * Re-enable SPU interrupts. + */ + spin_lock_irq(&spu->register_lock); + spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW); + spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW); + spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW); + spin_unlock_irq(&spu->register_lock); +} + +static int quiece_spu(struct spu_state *prev, struct spu *spu) +{ + /* + * Combined steps 2-18 of SPU context save sequence, which + * quiesce the SPU state (disable SPU execution, MFC command + * queues, decrementer, SPU interrupts, etc.). + * + * Returns 0 on success. + * 2 if failed step 2. + * 6 if failed step 6. + */ + + if (check_spu_isolate(prev, spu)) { /* Step 2. */ + return 2; + } + disable_interrupts(prev, spu); /* Step 3. */ + set_watchdog_timer(prev, spu); /* Step 4. */ + inhibit_user_access(prev, spu); /* Step 5. */ + if (check_spu_isolate(prev, spu)) { /* Step 6. */ + return 6; + } + set_switch_pending(prev, spu); /* Step 7. */ + save_mfc_cntl(prev, spu); /* Step 8. */ + save_spu_runcntl(prev, spu); /* Step 9. */ + save_mfc_sr1(prev, spu); /* Step 10. */ + save_spu_status(prev, spu); /* Step 11. */ + save_mfc_stopped_status(prev, spu); /* Step 12. */ + halt_mfc_decr(prev, spu); /* Step 13. */ + save_timebase(prev, spu); /* Step 14. */ + remove_other_spu_access(prev, spu); /* Step 15. */ + do_mfc_mssync(prev, spu); /* Step 16. */ + issue_mfc_tlbie(prev, spu); /* Step 17. */ + handle_pending_interrupts(prev, spu); /* Step 18. */ + + return 0; +} + +static void save_csa(struct spu_state *prev, struct spu *spu) +{ + /* + * Combine steps 19-44 of SPU context save sequence, which + * save regions of the privileged & problem state areas. + */ + + save_mfc_queues(prev, spu); /* Step 19. */ + save_ppu_querymask(prev, spu); /* Step 20. */ + save_ppu_querytype(prev, spu); /* Step 21. */ + save_ppu_tagstatus(prev, spu); /* NEW. */ + save_mfc_csr_tsq(prev, spu); /* Step 22. */ + save_mfc_csr_cmd(prev, spu); /* Step 23. */ + save_mfc_csr_ato(prev, spu); /* Step 24. */ + save_mfc_tclass_id(prev, spu); /* Step 25. */ + set_mfc_tclass_id(prev, spu); /* Step 26. */ + save_mfc_cmd(prev, spu); /* Step 26a - moved from 44. */ + purge_mfc_queue(prev, spu); /* Step 27. */ + wait_purge_complete(prev, spu); /* Step 28. */ + setup_mfc_sr1(prev, spu); /* Step 30. */ + save_spu_npc(prev, spu); /* Step 31. */ + save_spu_privcntl(prev, spu); /* Step 32. */ + reset_spu_privcntl(prev, spu); /* Step 33. */ + save_spu_lslr(prev, spu); /* Step 34. */ + reset_spu_lslr(prev, spu); /* Step 35. */ + save_spu_cfg(prev, spu); /* Step 36. */ + save_pm_trace(prev, spu); /* Step 37. */ + save_mfc_rag(prev, spu); /* Step 38. */ + save_ppu_mb_stat(prev, spu); /* Step 39. */ + save_ppu_mb(prev, spu); /* Step 40. */ + save_ppuint_mb(prev, spu); /* Step 41. */ + save_ch_part1(prev, spu); /* Step 42. */ + save_spu_mb(prev, spu); /* Step 43. */ + reset_ch(prev, spu); /* Step 45. */ +} + +static void save_lscsa(struct spu_state *prev, struct spu *spu) +{ + /* + * Perform steps 46-57 of SPU context save sequence, + * which save regions of the local store and register + * file. + */ + + resume_mfc_queue(prev, spu); /* Step 46. */ + /* Step 47. */ + setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code)); + set_switch_active(prev, spu); /* Step 48. */ + enable_interrupts(prev, spu); /* Step 49. */ + save_ls_16kb(prev, spu); /* Step 50. */ + set_spu_npc(prev, spu); /* Step 51. */ + set_signot1(prev, spu); /* Step 52. */ + set_signot2(prev, spu); /* Step 53. */ + send_save_code(prev, spu); /* Step 54. */ + set_ppu_querymask(prev, spu); /* Step 55. */ + wait_tag_complete(prev, spu); /* Step 56. */ + wait_spu_stopped(prev, spu); /* Step 57. */ +} + +static void force_spu_isolate_exit(struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Stop SPE execution and wait for completion. */ + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + iobarrier_rw(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); + + /* Restart SPE master runcntl. */ + spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK); + iobarrier_w(); + + /* Initiate isolate exit request and wait for completion. */ + out_be64(&priv2->spu_privcntl_RW, 4LL); + iobarrier_w(); + out_be32(&prob->spu_runcntl_RW, 2); + iobarrier_rw(); + POLL_WHILE_FALSE((in_be32(&prob->spu_status_R) + & SPU_STATUS_STOPPED_BY_STOP)); + + /* Reset load request to normal. */ + out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL); + iobarrier_w(); +} + +/** + * stop_spu_isolate + * Check SPU run-control state and force isolated + * exit function as necessary. + */ +static void stop_spu_isolate(struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) { + /* The SPU is in isolated state; the only way + * to get it out is to perform an isolated + * exit (clean) operation. + */ + force_spu_isolate_exit(spu); + } +} + +static void harvest(struct spu_state *prev, struct spu *spu) +{ + /* + * Perform steps 2-25 of SPU context restore sequence, + * which resets an SPU either after a failed save, or + * when using SPU for first time. + */ + + disable_interrupts(prev, spu); /* Step 2. */ + inhibit_user_access(prev, spu); /* Step 3. */ + terminate_spu_app(prev, spu); /* Step 4. */ + set_switch_pending(prev, spu); /* Step 5. */ + stop_spu_isolate(spu); /* NEW. */ + remove_other_spu_access(prev, spu); /* Step 6. */ + suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */ + wait_suspend_mfc_complete(prev, spu); /* Step 8. */ + if (!suspend_spe(prev, spu)) /* Step 9. */ + clear_spu_status(prev, spu); /* Step 10. */ + do_mfc_mssync(prev, spu); /* Step 11. */ + issue_mfc_tlbie(prev, spu); /* Step 12. */ + handle_pending_interrupts(prev, spu); /* Step 13. */ + purge_mfc_queue(prev, spu); /* Step 14. */ + wait_purge_complete(prev, spu); /* Step 15. */ + reset_spu_privcntl(prev, spu); /* Step 16. */ + reset_spu_lslr(prev, spu); /* Step 17. */ + setup_mfc_sr1(prev, spu); /* Step 18. */ + spu_invalidate_slbs(spu); /* Step 19. */ + reset_ch_part1(prev, spu); /* Step 20. */ + reset_ch_part2(prev, spu); /* Step 21. */ + enable_interrupts(prev, spu); /* Step 22. */ + set_switch_active(prev, spu); /* Step 23. */ + set_mfc_tclass_id(prev, spu); /* Step 24. */ + resume_mfc_queue(prev, spu); /* Step 25. */ +} + +static void restore_lscsa(struct spu_state *next, struct spu *spu) +{ + /* + * Perform steps 26-40 of SPU context restore sequence, + * which restores regions of the local store and register + * file. + */ + + set_watchdog_timer(next, spu); /* Step 26. */ + setup_spu_status_part1(next, spu); /* Step 27. */ + setup_spu_status_part2(next, spu); /* Step 28. */ + restore_mfc_rag(next, spu); /* Step 29. */ + /* Step 30. */ + setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code)); + set_spu_npc(next, spu); /* Step 31. */ + set_signot1(next, spu); /* Step 32. */ + set_signot2(next, spu); /* Step 33. */ + setup_decr(next, spu); /* Step 34. */ + setup_ppu_mb(next, spu); /* Step 35. */ + setup_ppuint_mb(next, spu); /* Step 36. */ + send_restore_code(next, spu); /* Step 37. */ + set_ppu_querymask(next, spu); /* Step 38. */ + wait_tag_complete(next, spu); /* Step 39. */ + wait_spu_stopped(next, spu); /* Step 40. */ +} + +static void restore_csa(struct spu_state *next, struct spu *spu) +{ + /* + * Combine steps 41-76 of SPU context restore sequence, which + * restore regions of the privileged & problem state areas. + */ + + restore_spu_privcntl(next, spu); /* Step 41. */ + restore_status_part1(next, spu); /* Step 42. */ + restore_status_part2(next, spu); /* Step 43. */ + restore_ls_16kb(next, spu); /* Step 44. */ + wait_tag_complete(next, spu); /* Step 45. */ + suspend_mfc(next, spu); /* Step 46. */ + wait_suspend_mfc_complete(next, spu); /* Step 47. */ + issue_mfc_tlbie(next, spu); /* Step 48. */ + clear_interrupts(next, spu); /* Step 49. */ + restore_mfc_queues(next, spu); /* Step 50. */ + restore_ppu_querymask(next, spu); /* Step 51. */ + restore_ppu_querytype(next, spu); /* Step 52. */ + restore_mfc_csr_tsq(next, spu); /* Step 53. */ + restore_mfc_csr_cmd(next, spu); /* Step 54. */ + restore_mfc_csr_ato(next, spu); /* Step 55. */ + restore_mfc_tclass_id(next, spu); /* Step 56. */ + set_llr_event(next, spu); /* Step 57. */ + restore_decr_wrapped(next, spu); /* Step 58. */ + restore_ch_part1(next, spu); /* Step 59. */ + restore_ch_part2(next, spu); /* Step 60. */ + restore_spu_lslr(next, spu); /* Step 61. */ + restore_spu_cfg(next, spu); /* Step 62. */ + restore_pm_trace(next, spu); /* Step 63. */ + restore_spu_npc(next, spu); /* Step 64. */ + restore_spu_mb(next, spu); /* Step 65. */ + check_ppu_mb_stat(next, spu); /* Step 66. */ + check_ppuint_mb_stat(next, spu); /* Step 67. */ + spu_invalidate_slbs(spu); /* Modified Step 68. */ + restore_mfc_sr1(next, spu); /* Step 69. */ + set_int_route(next, spu); /* NEW */ + restore_other_spu_access(next, spu); /* Step 70. */ + restore_spu_runcntl(next, spu); /* Step 71. */ + restore_mfc_cntl(next, spu); /* Step 72. */ + enable_user_access(next, spu); /* Step 73. */ + reset_switch_active(next, spu); /* Step 74. */ + reenable_interrupts(next, spu); /* Step 75. */ +} + +static int __do_spu_save(struct spu_state *prev, struct spu *spu) +{ + int rc; + + /* + * SPU context save can be broken into three phases: + * + * (a) quiesce [steps 2-16]. + * (b) save of CSA, performed by PPE [steps 17-42] + * (c) save of LSCSA, mostly performed by SPU [steps 43-52]. + * + * Returns 0 on success. + * 2,6 if failed to quiece SPU + * 53 if SPU-side of save failed. + */ + + rc = quiece_spu(prev, spu); /* Steps 2-16. */ + switch (rc) { + default: + case 2: + case 6: + harvest(prev, spu); + return rc; + break; + case 0: + break; + } + save_csa(prev, spu); /* Steps 17-43. */ + save_lscsa(prev, spu); /* Steps 44-53. */ + return check_save_status(prev, spu); /* Step 54. */ +} + +static int __do_spu_restore(struct spu_state *next, struct spu *spu) +{ + int rc; + + /* + * SPU context restore can be broken into three phases: + * + * (a) harvest (or reset) SPU [steps 2-24]. + * (b) restore LSCSA [steps 25-40], mostly performed by SPU. + * (c) restore CSA [steps 41-76], performed by PPE. + * + * The 'harvest' step is not performed here, but rather + * as needed below. + */ + + restore_lscsa(next, spu); /* Steps 24-39. */ + rc = check_restore_status(next, spu); /* Step 40. */ + switch (rc) { + default: + /* Failed. Return now. */ + return rc; + break; + case 0: + /* Fall through to next step. */ + break; + } + restore_csa(next, spu); + + return 0; +} + +/** + * spu_save - SPU context save, with locking. + * @prev: pointer to SPU context save area, to be saved. + * @spu: pointer to SPU iomem structure. + * + * Acquire locks, perform the save operation then return. + */ +int spu_save(struct spu_state *prev, struct spu *spu) +{ + int rc; + + acquire_spu_lock(spu); /* Step 1. */ + rc = __do_spu_save(prev, spu); /* Steps 2-53. */ + release_spu_lock(spu); + if (rc != 0 && rc != 2 && rc != 6) { + panic("%s failed on SPU[%d], rc=%d.\n", + __func__, spu->number, rc); + } + return 0; +} +EXPORT_SYMBOL_GPL(spu_save); + +/** + * spu_restore - SPU context restore, with harvest and locking. + * @new: pointer to SPU context save area, to be restored. + * @spu: pointer to SPU iomem structure. + * + * Perform harvest + restore, as we may not be coming + * from a previous successful save operation, and the + * hardware state is unknown. + */ +int spu_restore(struct spu_state *new, struct spu *spu) +{ + int rc; + + acquire_spu_lock(spu); + harvest(NULL, spu); + spu->slb_replace = 0; + rc = __do_spu_restore(new, spu); + release_spu_lock(spu); + if (rc) { + panic("%s failed on SPU[%d] rc=%d.\n", + __func__, spu->number, rc); + } + return rc; +} +EXPORT_SYMBOL_GPL(spu_restore); + +static void init_prob(struct spu_state *csa) +{ + csa->spu_chnlcnt_RW[9] = 1; + csa->spu_chnlcnt_RW[21] = 16; + csa->spu_chnlcnt_RW[23] = 1; + csa->spu_chnlcnt_RW[28] = 1; + csa->spu_chnlcnt_RW[30] = 1; + csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP; + csa->prob.mb_stat_R = 0x000400; +} + +static void init_priv1(struct spu_state *csa) +{ + /* Enable decode, relocate, tlbie response, master runcntl. */ + csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK | + MFC_STATE1_MASTER_RUN_CONTROL_MASK | + MFC_STATE1_PROBLEM_STATE_MASK | + MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK; + + /* Enable OS-specific set of interrupts. */ + csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR | + CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR | + CLASS0_ENABLE_SPU_ERROR_INTR; + csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR | + CLASS1_ENABLE_STORAGE_FAULT_INTR; + csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR | + CLASS2_ENABLE_SPU_HALT_INTR | + CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR; +} + +static void init_priv2(struct spu_state *csa) +{ + csa->priv2.spu_lslr_RW = LS_ADDR_MASK; + csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE | + MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION | + MFC_CNTL_DMA_QUEUES_EMPTY_MASK; +} + +/** + * spu_alloc_csa - allocate and initialize an SPU context save area. + * + * Allocate and initialize the contents of an SPU context save area. + * This includes enabling address translation, interrupt masks, etc., + * as appropriate for the given OS environment. + * + * Note that storage for the 'lscsa' is allocated separately, + * as it is by far the largest of the context save regions, + * and may need to be pinned or otherwise specially aligned. + */ +int spu_init_csa(struct spu_state *csa) +{ + int rc; + + if (!csa) + return -EINVAL; + memset(csa, 0, sizeof(struct spu_state)); + + rc = spu_alloc_lscsa(csa); + if (rc) + return rc; + + spin_lock_init(&csa->register_lock); + + init_prob(csa); + init_priv1(csa); + init_priv2(csa); + + return 0; +} + +void spu_fini_csa(struct spu_state *csa) +{ + spu_free_lscsa(csa); +} diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c new file mode 100644 index 0000000000..157e046e6e --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include + +#include "spufs.h" + +/** + * sys_spu_run - run code loaded into an SPU + * + * @unpc: next program counter for the SPU + * @ustatus: status of the SPU + * + * This system call transfers the control of execution of a + * user space thread to an SPU. It will return when the + * SPU has finished executing or when it hits an error + * condition and it will be interrupted if a signal needs + * to be delivered to a handler in user space. + * + * The next program counter is set to the passed value + * before the SPU starts fetching code and the user space + * pointer gets updated with the new value when returning + * from kernel space. + * + * The status value returned from spu_run reflects the + * value of the spu_status register after the SPU has stopped. + * + */ +static long do_spu_run(struct file *filp, + __u32 __user *unpc, + __u32 __user *ustatus) +{ + long ret; + struct spufs_inode_info *i; + u32 npc, status; + + ret = -EFAULT; + if (get_user(npc, unpc)) + goto out; + + /* check if this file was created by spu_create */ + ret = -EINVAL; + if (filp->f_op != &spufs_context_fops) + goto out; + + i = SPUFS_I(file_inode(filp)); + ret = spufs_run_spu(i->i_ctx, &npc, &status); + + if (put_user(npc, unpc)) + ret = -EFAULT; + + if (ustatus && put_user(status, ustatus)) + ret = -EFAULT; +out: + return ret; +} + +static long do_spu_create(const char __user *pathname, unsigned int flags, + umode_t mode, struct file *neighbor) +{ + struct path path; + struct dentry *dentry; + int ret; + + dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY); + ret = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + ret = spufs_create(&path, dentry, flags, mode, neighbor); + done_path_create(&path, dentry); + } + + return ret; +} + +struct spufs_calls spufs_calls = { + .create_thread = do_spu_create, + .spu_run = do_spu_run, + .notify_spus_active = do_notify_spus_active, + .owner = THIS_MODULE, +#ifdef CONFIG_COREDUMP + .coredump_extra_notes_size = spufs_coredump_extra_notes_size, + .coredump_extra_notes_write = spufs_coredump_extra_notes_write, +#endif +}; diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig new file mode 100644 index 0000000000..ff30ed579a --- /dev/null +++ b/arch/powerpc/platforms/chrp/Kconfig @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_CHRP + bool "Common Hardware Reference Platform (CHRP) based machines" + depends on PPC_BOOK3S_32 + select HAVE_PCSPKR_PLATFORM + select MPIC + select PPC_I8259 + select PPC_INDIRECT_PCI + select PPC_RTAS + select PPC_RTAS_DAEMON + select RTAS_ERROR_LOGGING + select PPC_MPC106 + select PPC_UDBG_16550 + select PPC_HASH_MMU_NATIVE + select FORCE_PCI + default y diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile new file mode 100644 index 0000000000..05639db9a3 --- /dev/null +++ b/arch/powerpc/platforms/chrp/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += setup.o time.o pegasos_eth.o pci.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_NVRAM:m=y) += nvram.o diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h new file mode 100644 index 0000000000..6ff4631d9d --- /dev/null +++ b/arch/powerpc/platforms/chrp/chrp.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Declarations of CHRP platform-specific things. + */ + +extern void chrp_nvram_init(void); +extern void chrp_get_rtc_time(struct rtc_time *); +extern int chrp_set_rtc_time(struct rtc_time *); +extern long chrp_time_init(void); + +extern void chrp_find_bridges(void); diff --git a/arch/powerpc/platforms/chrp/gg2.h b/arch/powerpc/platforms/chrp/gg2.h new file mode 100644 index 0000000000..341ae55b99 --- /dev/null +++ b/arch/powerpc/platforms/chrp/gg2.h @@ -0,0 +1,61 @@ +/* + * include/asm-ppc/gg2.h -- VLSI VAS96011/12 `Golden Gate 2' register definitions + * + * Copyright (C) 1997 Geert Uytterhoeven + * + * This file is based on the following documentation: + * + * The VAS96011/12 Chipset, Data Book, Edition 1.0 + * VLSI Technology, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef _ASMPPC_GG2_H +#define _ASMPPC_GG2_H + + /* + * Memory Map (CHRP mode) + */ + +#define GG2_PCI_MEM_BASE 0xc0000000 /* Peripheral memory space */ +#define GG2_ISA_MEM_BASE 0xf7000000 /* Peripheral memory alias */ +#define GG2_ISA_IO_BASE 0xf8000000 /* Peripheral I/O space */ +#define GG2_PCI_CONFIG_BASE 0xfec00000 /* PCI configuration space */ +#define GG2_INT_ACK_SPECIAL 0xfec80000 /* Interrupt acknowledge and */ + /* special PCI cycles */ +#define GG2_ROM_BASE0 0xff000000 /* ROM bank 0 */ +#define GG2_ROM_BASE1 0xff800000 /* ROM bank 1 */ + + + /* + * GG2 specific PCI Registers + */ + +extern void __iomem *gg2_pci_config_base; /* kernel virtual address */ + +#define GG2_PCI_BUSNO 0x40 /* Bus number */ +#define GG2_PCI_SUBBUSNO 0x41 /* Subordinate bus number */ +#define GG2_PCI_DISCCTR 0x42 /* Disconnect counter */ +#define GG2_PCI_PPC_CTRL 0x50 /* PowerPC interface control register */ +#define GG2_PCI_ADDR_MAP 0x5c /* Address map */ +#define GG2_PCI_PCI_CTRL 0x60 /* PCI interface control register */ +#define GG2_PCI_ROM_CTRL 0x70 /* ROM interface control register */ +#define GG2_PCI_ROM_TIME 0x74 /* ROM timing */ +#define GG2_PCI_CC_CTRL 0x80 /* Cache controller control register */ +#define GG2_PCI_DRAM_BANK0 0x90 /* Control register for DRAM bank #0 */ +#define GG2_PCI_DRAM_BANK1 0x94 /* Control register for DRAM bank #1 */ +#define GG2_PCI_DRAM_BANK2 0x98 /* Control register for DRAM bank #2 */ +#define GG2_PCI_DRAM_BANK3 0x9c /* Control register for DRAM bank #3 */ +#define GG2_PCI_DRAM_BANK4 0xa0 /* Control register for DRAM bank #4 */ +#define GG2_PCI_DRAM_BANK5 0xa4 /* Control register for DRAM bank #5 */ +#define GG2_PCI_DRAM_TIME0 0xb0 /* Timing parameters set #0 */ +#define GG2_PCI_DRAM_TIME1 0xb4 /* Timing parameters set #1 */ +#define GG2_PCI_DRAM_CTRL 0xc0 /* DRAM control */ +#define GG2_PCI_ERR_CTRL 0xd0 /* Error control register */ +#define GG2_PCI_ERR_STATUS 0xd4 /* Error status register */ + /* Cleared when read */ + +#endif /* _ASMPPC_GG2_H */ diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c new file mode 100644 index 0000000000..0eedae9649 --- /dev/null +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * c 2001 PPC 64 Team, IBM Corp + * + * /dev/nvram driver for PPC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "chrp.h" + +static unsigned int nvram_size; +static unsigned char nvram_buf[4]; +static DEFINE_SPINLOCK(nvram_lock); + +static unsigned char chrp_nvram_read_val(int addr) +{ + unsigned int done; + unsigned long flags; + unsigned char ret; + + if (addr >= nvram_size) { + printk(KERN_DEBUG "%s: read addr %d > nvram_size %u\n", + current->comm, addr, nvram_size); + return 0xff; + } + spin_lock_irqsave(&nvram_lock, flags); + if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_FETCH), 3, 2, &done, addr, + __pa(nvram_buf), 1) != 0) || 1 != done) + ret = 0xff; + else + ret = nvram_buf[0]; + spin_unlock_irqrestore(&nvram_lock, flags); + + return ret; +} + +static void chrp_nvram_write_val(int addr, unsigned char val) +{ + unsigned int done; + unsigned long flags; + + if (addr >= nvram_size) { + printk(KERN_DEBUG "%s: write addr %d > nvram_size %u\n", + current->comm, addr, nvram_size); + return; + } + spin_lock_irqsave(&nvram_lock, flags); + nvram_buf[0] = val; + if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_STORE), 3, 2, &done, addr, + __pa(nvram_buf), 1) != 0) || 1 != done) + printk(KERN_DEBUG "rtas IO error storing 0x%02x at %d", val, addr); + spin_unlock_irqrestore(&nvram_lock, flags); +} + +static ssize_t chrp_nvram_size(void) +{ + return nvram_size; +} + +void __init chrp_nvram_init(void) +{ + struct device_node *nvram; + const __be32 *nbytes_p; + unsigned int proplen; + + nvram = of_find_node_by_type(NULL, "nvram"); + if (nvram == NULL) + return; + + nbytes_p = of_get_property(nvram, "#bytes", &proplen); + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { + of_node_put(nvram); + return; + } + + nvram_size = be32_to_cpup(nbytes_p); + + printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size); + of_node_put(nvram); + + ppc_md.nvram_read_val = chrp_nvram_read_val; + ppc_md.nvram_write_val = chrp_nvram_write_val; + ppc_md.nvram_size = chrp_nvram_size; + + return; +} + +MODULE_LICENSE("GPL v2"); diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c new file mode 100644 index 0000000000..428fd2a7b3 --- /dev/null +++ b/arch/powerpc/platforms/chrp/pci.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CHRP pci routines. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "chrp.h" +#include "gg2.h" + +/* LongTrail */ +void __iomem *gg2_pci_config_base; + +/* + * The VLSI Golden Gate II has only 512K of PCI configuration space, so we + * limit the bus number to 3 bits + */ + +static int gg2_read_config(struct pci_bus *bus, unsigned int devfn, int off, + int len, u32 *val) +{ + volatile void __iomem *cfg_data; + struct pci_controller *hose = pci_bus_to_host(bus); + + if (bus->number > 7) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that off is + * suitably aligned and that len is 1, 2 or 4. + */ + cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off); + switch (len) { + case 1: + *val = in_8(cfg_data); + break; + case 2: + *val = in_le16(cfg_data); + break; + default: + *val = in_le32(cfg_data); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int gg2_write_config(struct pci_bus *bus, unsigned int devfn, int off, + int len, u32 val) +{ + volatile void __iomem *cfg_data; + struct pci_controller *hose = pci_bus_to_host(bus); + + if (bus->number > 7) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that off is + * suitably aligned and that len is 1, 2 or 4. + */ + cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off); + switch (len) { + case 1: + out_8(cfg_data, val); + break; + case 2: + out_le16(cfg_data, val); + break; + default: + out_le32(cfg_data, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops gg2_pci_ops = +{ + .read = gg2_read_config, + .write = gg2_write_config, +}; + +/* + * Access functions for PCI config space using RTAS calls. + */ +static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 *val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8) + | (((bus->number - hose->first_busno) & 0xff) << 16) + | (hose->global_number << 24); + int ret = -1; + int rval; + + rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len); + *val = ret; + return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL; +} + +static int rtas_write_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8) + | (((bus->number - hose->first_busno) & 0xff) << 16) + | (hose->global_number << 24); + int rval; + + rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL, + addr, len, val); + return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops rtas_pci_ops = +{ + .read = rtas_read_config, + .write = rtas_write_config, +}; + +volatile struct Hydra __iomem *Hydra = NULL; + +static int __init hydra_init(void) +{ + struct device_node *np; + struct resource r; + + np = of_find_node_by_name(NULL, "mac-io"); + if (np == NULL || of_address_to_resource(np, 0, &r)) { + of_node_put(np); + return 0; + } + of_node_put(np); + Hydra = ioremap(r.start, resource_size(&r)); + printk("Hydra Mac I/O at %llx\n", (unsigned long long)r.start); + printk("Hydra Feature_Control was %x", + in_le32(&Hydra->Feature_Control)); + out_le32(&Hydra->Feature_Control, (HYDRA_FC_SCC_CELL_EN | + HYDRA_FC_SCSI_CELL_EN | + HYDRA_FC_SCCA_ENABLE | + HYDRA_FC_SCCB_ENABLE | + HYDRA_FC_ARB_BYPASS | + HYDRA_FC_MPIC_ENABLE | + HYDRA_FC_SLOW_SCC_PCLK | + HYDRA_FC_MPIC_IS_MASTER)); + printk(", now %x\n", in_le32(&Hydra->Feature_Control)); + return 1; +} + +#define PRG_CL_RESET_VALID 0x00010000 + +static void __init +setup_python(struct pci_controller *hose, struct device_node *dev) +{ + u32 __iomem *reg; + u32 val; + struct resource r; + + if (of_address_to_resource(dev, 0, &r)) { + printk(KERN_ERR "No address for Python PCI controller\n"); + return; + } + + /* Clear the magic go-slow bit */ + reg = ioremap(r.start + 0xf6000, 0x40); + BUG_ON(!reg); + val = in_be32(®[12]); + if (val & PRG_CL_RESET_VALID) { + out_be32(®[12], val & ~PRG_CL_RESET_VALID); + in_be32(®[12]); + } + iounmap(reg); + + setup_indirect_pci(hose, r.start + 0xf8000, r.start + 0xf8010, 0); +} + +/* Marvell Discovery II based Pegasos 2 */ +static void __init setup_peg2(struct pci_controller *hose, struct device_node *dev) +{ + struct device_node *root = of_find_node_by_path("/"); + struct device_node *rtas; + + rtas = of_find_node_by_name (root, "rtas"); + if (rtas) { + hose->ops = &rtas_pci_ops; + of_node_put(rtas); + } else { + printk ("RTAS supporting Pegasos OF not found, please upgrade" + " your firmware\n"); + } + pci_add_flags(PCI_REASSIGN_ALL_BUS); + /* keep the reference to the root node */ +} + +void __init +chrp_find_bridges(void) +{ + struct device_node *dev; + const int *bus_range; + int len, index = -1; + struct pci_controller *hose; + const unsigned int *dma; + const char *model, *machine; + int is_longtrail = 0, is_mot = 0, is_pegasos = 0; + struct device_node *root = of_find_node_by_path("/"); + struct resource r; + /* + * The PCI host bridge nodes on some machines don't have + * properties to adequately identify them, so we have to + * look at what sort of machine this is as well. + */ + machine = of_get_property(root, "model", NULL); + if (machine != NULL) { + is_longtrail = strncmp(machine, "IBM,LongTrail", 13) == 0; + is_mot = strncmp(machine, "MOT", 3) == 0; + if (strncmp(machine, "Pegasos2", 8) == 0) + is_pegasos = 2; + else if (strncmp(machine, "Pegasos", 7) == 0) + is_pegasos = 1; + } + for_each_child_of_node(root, dev) { + if (!of_node_is_type(dev, "pci")) + continue; + ++index; + /* The GG2 bridge on the LongTrail doesn't have an address */ + if (of_address_to_resource(dev, 0, &r) && !is_longtrail) { + printk(KERN_WARNING "Can't use %pOF: no address\n", + dev); + continue; + } + bus_range = of_get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %pOF\n", + dev); + continue; + } + if (bus_range[1] == bus_range[0]) + printk(KERN_INFO "PCI bus %d", bus_range[0]); + else + printk(KERN_INFO "PCI buses %d..%d", + bus_range[0], bus_range[1]); + printk(" controlled by %pOF", dev); + if (!is_longtrail) + printk(" at %llx", (unsigned long long)r.start); + printk("\n"); + + hose = pcibios_alloc_controller(dev); + if (!hose) { + printk("Can't allocate PCI controller structure for %pOF\n", + dev); + continue; + } + hose->first_busno = hose->self_busno = bus_range[0]; + hose->last_busno = bus_range[1]; + + model = of_get_property(dev, "model", NULL); + if (model == NULL) + model = ""; + if (strncmp(model, "IBM, Python", 11) == 0) { + setup_python(hose, dev); + } else if (is_mot + || strncmp(model, "Motorola, Grackle", 17) == 0) { + setup_grackle(hose); + } else if (is_longtrail) { + void __iomem *p = ioremap(GG2_PCI_CONFIG_BASE, 0x80000); + hose->ops = &gg2_pci_ops; + hose->cfg_data = p; + gg2_pci_config_base = p; + } else if (is_pegasos == 1) { + setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc, 0); + } else if (is_pegasos == 2) { + setup_peg2(hose, dev); + } else if (!strncmp(model, "IBM,CPC710", 10)) { + setup_indirect_pci(hose, + r.start + 0x000f8000, + r.start + 0x000f8010, + 0); + if (index == 0) { + dma = of_get_property(dev, "system-dma-base", + &len); + if (dma && len >= sizeof(*dma)) { + dma = (unsigned int *) + (((unsigned long)dma) + + len - sizeof(*dma)); + pci_dram_offset = *dma; + } + } + } else { + printk("No methods for %pOF (model %s), using RTAS\n", + dev, model); + hose->ops = &rtas_pci_ops; + } + + pci_process_bridge_OF_ranges(hose, dev, index == 0); + + /* check the first bridge for a property that we can + use to set pci_dram_offset */ + dma = of_get_property(dev, "ibm,dma-ranges", &len); + if (index == 0 && dma != NULL && len >= 6 * sizeof(*dma)) { + pci_dram_offset = dma[2] - dma[3]; + printk("pci_dram_offset = %lx\n", pci_dram_offset); + } + } + of_node_put(root); + + /* + * "Temporary" fixes for PCI devices. + * -- Geert + */ + hydra_init(); /* Mac I/O */ + + pci_create_OF_bus_map(); +} + +/* SL82C105 IDE Control/Status Register */ +#define SL82C105_IDECSR 0x40 + +/* Fixup for Winbond ATA quirk, required for briq mostly because the + * 8259 is configured for level sensitive IRQ 14 and so wants the + * ATA controller to be set to fully native mode or bad things + * will happen. + */ +static void chrp_pci_fixup_winbond_ata(struct pci_dev *sl82c105) +{ + u8 progif; + + /* If non-briq machines need that fixup too, please speak up */ + if (!machine_is(chrp) || _chrp_type != _CHRP_briq) + return; + + if ((sl82c105->class & 5) != 5) { + printk("W83C553: Switching SL82C105 IDE to PCI native mode\n"); + /* Enable SL82C105 PCI native IDE mode */ + pci_read_config_byte(sl82c105, PCI_CLASS_PROG, &progif); + pci_write_config_byte(sl82c105, PCI_CLASS_PROG, progif | 0x05); + sl82c105->class |= 0x05; + /* Disable SL82C105 second port */ + pci_write_config_word(sl82c105, SL82C105_IDECSR, 0x0003); + /* Clear IO BARs, they will be reassigned */ + pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_0, 0); + pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_1, 0); + pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_2, 0); + pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_3, 0); + } +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, + chrp_pci_fixup_winbond_ata); + +/* Pegasos2 firmware version 20040810 configures the built-in IDE controller + * in legacy mode, but sets the PCI registers to PCI native mode. + * The chip can only operate in legacy mode, so force the PCI class into legacy + * mode as well. The same fixup must be done to the class-code property in + * the IDE node /pci@80000000/ide@C,1 + */ +static void chrp_pci_fixup_vt8231_ata(struct pci_dev *viaide) +{ + u8 progif; + struct pci_dev *viaisa; + + if (!machine_is(chrp) || _chrp_type != _CHRP_Pegasos) + return; + if (viaide->irq != 14) + return; + + viaisa = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL); + if (!viaisa) + return; + dev_info(&viaide->dev, "Fixing VIA IDE, force legacy mode on\n"); + + pci_read_config_byte(viaide, PCI_CLASS_PROG, &progif); + pci_write_config_byte(viaide, PCI_CLASS_PROG, progif & ~0x5); + viaide->class &= ~0x5; + + pci_dev_put(viaisa); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, chrp_pci_fixup_vt8231_ata); diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c new file mode 100644 index 0000000000..5c4f1a9ca1 --- /dev/null +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2005 Sven Luther + * Thanks to : + * Dale Farnsworth + * Mark A. Greer + * Nicolas DET + * Benjamin Herrenschmidt + * And anyone else who helped me on this. + */ + +#include +#include +#include +#include +#include +#include +#include + +#define PEGASOS2_MARVELL_REGBASE (0xf1000000) +#define PEGASOS2_MARVELL_REGSIZE (0x00004000) +#define PEGASOS2_SRAM_BASE (0xf2000000) +#define PEGASOS2_SRAM_SIZE (256*1024) + +#define PEGASOS2_SRAM_BASE_ETH_PORT0 (PEGASOS2_SRAM_BASE) +#define PEGASOS2_SRAM_BASE_ETH_PORT1 (PEGASOS2_SRAM_BASE_ETH_PORT0 + (PEGASOS2_SRAM_SIZE / 2) ) + + +#define PEGASOS2_SRAM_RXRING_SIZE (PEGASOS2_SRAM_SIZE/4) +#define PEGASOS2_SRAM_TXRING_SIZE (PEGASOS2_SRAM_SIZE/4) + +#undef BE_VERBOSE + +static struct resource mv643xx_eth_shared_resources[] = { + [0] = { + .name = "ethernet shared base", + .start = 0xf1000000 + MV643XX_ETH_SHARED_REGS, + .end = 0xf1000000 + MV643XX_ETH_SHARED_REGS + + MV643XX_ETH_SHARED_REGS_SIZE - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device mv643xx_eth_shared_device = { + .name = MV643XX_ETH_SHARED_NAME, + .id = 0, + .num_resources = ARRAY_SIZE(mv643xx_eth_shared_resources), + .resource = mv643xx_eth_shared_resources, +}; + +/* + * The orion mdio driver only covers shared + 0x4 up to shared + 0x84 - 1 + */ +static struct resource mv643xx_eth_mvmdio_resources[] = { + [0] = { + .name = "ethernet mdio base", + .start = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x4, + .end = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x83, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device mv643xx_eth_mvmdio_device = { + .name = "orion-mdio", + .id = -1, + .num_resources = ARRAY_SIZE(mv643xx_eth_mvmdio_resources), + .resource = mv643xx_eth_mvmdio_resources, +}; + +static struct resource mv643xx_eth_port1_resources[] = { + [0] = { + .name = "eth port1 irq", + .start = 9, + .end = 9, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct mv643xx_eth_platform_data eth_port1_pd = { + .shared = &mv643xx_eth_shared_device, + .port_number = 1, + .phy_addr = MV643XX_ETH_PHY_ADDR(7), + + .tx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1, + .tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE, + .tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16, + + .rx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1 + PEGASOS2_SRAM_TXRING_SIZE, + .rx_sram_size = PEGASOS2_SRAM_RXRING_SIZE, + .rx_queue_size = PEGASOS2_SRAM_RXRING_SIZE/16, +}; + +static struct platform_device eth_port1_device = { + .name = MV643XX_ETH_NAME, + .id = 1, + .num_resources = ARRAY_SIZE(mv643xx_eth_port1_resources), + .resource = mv643xx_eth_port1_resources, + .dev = { + .platform_data = ð_port1_pd, + }, +}; + +static struct platform_device *mv643xx_eth_pd_devs[] __initdata = { + &mv643xx_eth_shared_device, + &mv643xx_eth_mvmdio_device, + ð_port1_device, +}; + +/***********/ +/***********/ +#define MV_READ(offset,val) { val = readl(mv643xx_reg_base + offset); } +#define MV_WRITE(offset,data) writel(data, mv643xx_reg_base + offset) + +static void __iomem *mv643xx_reg_base; + +static int __init Enable_SRAM(void) +{ + u32 ALong; + + if (mv643xx_reg_base == NULL) + mv643xx_reg_base = ioremap(PEGASOS2_MARVELL_REGBASE, + PEGASOS2_MARVELL_REGSIZE); + + if (mv643xx_reg_base == NULL) + return -ENOMEM; + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: register remapped from %p to %p\n", + (void *)PEGASOS2_MARVELL_REGBASE, (void *)mv643xx_reg_base); +#endif + + MV_WRITE(MV64340_SRAM_CONFIG, 0); + + MV_WRITE(MV64340_INTEGRATED_SRAM_BASE_ADDR, PEGASOS2_SRAM_BASE >> 16); + + MV_READ(MV64340_BASE_ADDR_ENABLE, ALong); + ALong &= ~(1 << 19); + MV_WRITE(MV64340_BASE_ADDR_ENABLE, ALong); + + ALong = 0x02; + ALong |= PEGASOS2_SRAM_BASE & 0xffff0000; + MV_WRITE(MV643XX_ETH_BAR_4, ALong); + + MV_WRITE(MV643XX_ETH_SIZE_REG_4, (PEGASOS2_SRAM_SIZE-1) & 0xffff0000); + + MV_READ(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong); + ALong &= ~(1 << 4); + MV_WRITE(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong); + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: register unmapped\n"); + printk("Pegasos II/Marvell MV64361: SRAM at %p, size=%x\n", (void*) PEGASOS2_SRAM_BASE, PEGASOS2_SRAM_SIZE); +#endif + + iounmap(mv643xx_reg_base); + mv643xx_reg_base = NULL; + + return 1; +} + + +/***********/ +/***********/ +static int __init mv643xx_eth_add_pds(void) +{ + int ret = 0; + static struct pci_device_id pci_marvell_mv64360[] = { + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_MV64360) }, + { } + }; + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: init\n"); +#endif + + if (pci_dev_present(pci_marvell_mv64360)) { + ret = platform_add_devices(mv643xx_eth_pd_devs, + ARRAY_SIZE(mv643xx_eth_pd_devs)); + + if ( Enable_SRAM() < 0) + { + eth_port1_pd.tx_sram_addr = 0; + eth_port1_pd.tx_sram_size = 0; + eth_port1_pd.rx_sram_addr = 0; + eth_port1_pd.rx_sram_size = 0; + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: Can't enable the " + "SRAM\n"); +#endif + } + } + +#ifdef BE_VERBOSE + printk("Pegasos II/Marvell MV64361: init is over\n"); +#endif + + return ret; +} + +device_initcall(mv643xx_eth_add_pds); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c new file mode 100644 index 0000000000..36ee3a5056 --- /dev/null +++ b/arch/powerpc/platforms/chrp/setup.c @@ -0,0 +1,586 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + */ + +/* + * bootup setup stuff.. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "chrp.h" +#include "gg2.h" + +void rtas_indicator_progress(char *, unsigned short); + +int _chrp_type; +EXPORT_SYMBOL(_chrp_type); + +static struct mpic *chrp_mpic; + +/* Used for doing CHRP event-scans */ +DEFINE_PER_CPU(struct timer_list, heartbeat_timer); +unsigned long event_scan_interval; + +extern unsigned long loops_per_jiffy; + +/* To be replaced by RTAS when available */ +static unsigned int __iomem *briq_SPOR; + +#ifdef CONFIG_SMP +extern struct smp_ops_t chrp_smp_ops; +#endif + +static const char *gg2_memtypes[4] = { + "FPM", "SDRAM", "EDO", "BEDO" +}; +static const char *gg2_cachesizes[4] = { + "256 KB", "512 KB", "1 MB", "Reserved" +}; +static const char *gg2_cachetypes[4] = { + "Asynchronous", "Reserved", "Flow-Through Synchronous", + "Pipelined Synchronous" +}; +static const char *gg2_cachemodes[4] = { + "Disabled", "Write-Through", "Copy-Back", "Transparent Mode" +}; + +static const char *chrp_names[] = { + "Unknown", + "","","", + "Motorola", + "IBM or Longtrail", + "Genesi Pegasos", + "Total Impact Briq" +}; + +static void chrp_show_cpuinfo(struct seq_file *m) +{ + int i, sdramen; + unsigned int t; + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + + /* longtrail (goldengate) stuff */ + if (model && !strncmp(model, "IBM,LongTrail", 13)) { + /* VLSI VAS96011/12 `Golden Gate 2' */ + /* Memory banks */ + sdramen = (in_le32(gg2_pci_config_base + GG2_PCI_DRAM_CTRL) + >>31) & 1; + for (i = 0; i < (sdramen ? 4 : 6); i++) { + t = in_le32(gg2_pci_config_base+ + GG2_PCI_DRAM_BANK0+ + i*4); + if (!(t & 1)) + continue; + switch ((t>>8) & 0x1f) { + case 0x1f: + model = "4 MB"; + break; + case 0x1e: + model = "8 MB"; + break; + case 0x1c: + model = "16 MB"; + break; + case 0x18: + model = "32 MB"; + break; + case 0x10: + model = "64 MB"; + break; + case 0x00: + model = "128 MB"; + break; + default: + model = "Reserved"; + break; + } + seq_printf(m, "memory bank %d\t: %s %s\n", i, model, + gg2_memtypes[sdramen ? 1 : ((t>>1) & 3)]); + } + /* L2 cache */ + t = in_le32(gg2_pci_config_base+GG2_PCI_CC_CTRL); + seq_printf(m, "board l2\t: %s %s (%s)\n", + gg2_cachesizes[(t>>7) & 3], + gg2_cachetypes[(t>>2) & 3], + gg2_cachemodes[t & 3]); + } + of_node_put(root); +} + +/* + * Fixes for the National Semiconductor PC78308VUL SuperI/O + * + * Some versions of Open Firmware incorrectly initialize the IRQ settings + * for keyboard and mouse + */ +static inline void __init sio_write(u8 val, u8 index) +{ + outb(index, 0x15c); + outb(val, 0x15d); +} + +static inline u8 __init sio_read(u8 index) +{ + outb(index, 0x15c); + return inb(0x15d); +} + +static void __init sio_fixup_irq(const char *name, u8 device, u8 level, + u8 type) +{ + u8 level0, type0, active; + + /* select logical device */ + sio_write(device, 0x07); + active = sio_read(0x30); + level0 = sio_read(0x70); + type0 = sio_read(0x71); + if (level0 != level || type0 != type || !active) { + printk(KERN_WARNING "sio: %s irq level %d, type %d, %sactive: " + "remapping to level %d, type %d, active\n", + name, level0, type0, !active ? "in" : "", level, type); + sio_write(0x01, 0x30); + sio_write(level, 0x70); + sio_write(type, 0x71); + } +} + +static void __init sio_init(void) +{ + struct device_node *root; + const char *model; + + root = of_find_node_by_path("/"); + if (!root) + return; + + model = of_get_property(root, "model", NULL); + if (model && !strncmp(model, "IBM,LongTrail", 13)) { + /* logical device 0 (KBC/Keyboard) */ + sio_fixup_irq("keyboard", 0, 1, 2); + /* select logical device 1 (KBC/Mouse) */ + sio_fixup_irq("mouse", 1, 12, 2); + } + + of_node_put(root); +} + + +static void __init pegasos_set_l2cr(void) +{ + struct device_node *np; + + /* On Pegasos, enable the l2 cache if needed, as the OF forgets it */ + if (_chrp_type != _CHRP_Pegasos) + return; + + /* Enable L2 cache if needed */ + np = of_find_node_by_type(NULL, "cpu"); + if (np != NULL) { + const unsigned int *l2cr = of_get_property(np, "l2cr", NULL); + if (l2cr == NULL) { + printk ("Pegasos l2cr : no cpu l2cr property found\n"); + goto out; + } + if (!((*l2cr) & 0x80000000)) { + printk ("Pegasos l2cr : L2 cache was not active, " + "activating\n"); + _set_L2CR(0); + _set_L2CR((*l2cr) | 0x80000000); + } + } +out: + of_node_put(np); +} + +static void __noreturn briq_restart(char *cmd) +{ + local_irq_disable(); + if (briq_SPOR) + out_be32(briq_SPOR, 0); + for(;;); +} + +/* + * Per default, input/output-device points to the keyboard/screen + * If no card is installed, the built-in serial port is used as a fallback. + * But unfortunately, the firmware does not connect /chosen/{stdin,stdout} + * to the built-in serial node. Instead, a /failsafe node is created. + */ +static __init void chrp_init(void) +{ + struct device_node *node; + const char *property; + + if (strstr(boot_command_line, "console=")) + return; + /* find the boot console from /chosen/stdout */ + if (!of_chosen) + return; + node = of_find_node_by_path("/"); + if (!node) + return; + property = of_get_property(node, "model", NULL); + if (!property) + goto out_put; + if (strcmp(property, "Pegasos2")) + goto out_put; + /* this is a Pegasos2 */ + property = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (!property) + goto out_put; + of_node_put(node); + node = of_find_node_by_path(property); + if (!node) + return; + if (!of_node_is_type(node, "serial")) + goto out_put; + /* + * The 9pin connector is either /failsafe + * or /pci@80000000/isa@C/serial@i2F8 + * The optional graphics card has also type 'serial' in VGA mode. + */ + if (of_node_name_eq(node, "failsafe") || of_node_name_eq(node, "serial")) + add_preferred_console("ttyS", 0, NULL); +out_put: + of_node_put(node); +} + +static void __init chrp_setup_arch(void) +{ + struct device_node *root = of_find_node_by_path("/"); + const char *machine = NULL; + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000/HZ; + + if (root) + machine = of_get_property(root, "model", NULL); + if (machine && strncmp(machine, "Pegasos", 7) == 0) { + _chrp_type = _CHRP_Pegasos; + } else if (machine && strncmp(machine, "IBM", 3) == 0) { + _chrp_type = _CHRP_IBM; + } else if (machine && strncmp(machine, "MOT", 3) == 0) { + _chrp_type = _CHRP_Motorola; + } else if (machine && strncmp(machine, "TotalImpact,BRIQ-1", 18) == 0) { + _chrp_type = _CHRP_briq; + /* Map the SPOR register on briq and change the restart hook */ + briq_SPOR = ioremap(0xff0000e8, 4); + ppc_md.restart = briq_restart; + } else { + /* Let's assume it is an IBM chrp if all else fails */ + _chrp_type = _CHRP_IBM; + } + of_node_put(root); + printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]); + + rtas_initialize(); + if (rtas_function_token(RTAS_FN_DISPLAY_CHARACTER) >= 0) + ppc_md.progress = rtas_progress; + + /* use RTAS time-of-day routines if available */ + if (rtas_function_token(RTAS_FN_GET_TIME_OF_DAY) != RTAS_UNKNOWN_SERVICE) { + ppc_md.get_boot_time = rtas_get_boot_time; + ppc_md.get_rtc_time = rtas_get_rtc_time; + ppc_md.set_rtc_time = rtas_set_rtc_time; + } + + /* On pegasos, enable the L2 cache if not already done by OF */ + pegasos_set_l2cr(); + + /* + * Fix the Super I/O configuration + */ + sio_init(); + + /* + * Print the banner, then scroll down so boot progress + * can be printed. -- Cort + */ + if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0); +} + +static void chrp_8259_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int cascade_irq = i8259_irq(); + + if (cascade_irq) + generic_handle_irq(cascade_irq); + + chip->irq_eoi(&desc->irq_data); +} + +/* + * Finds the open-pic node and sets up the mpic driver. + */ +static void __init chrp_find_openpic(void) +{ + struct device_node *np, *root; + int len, i, j; + int isu_size; + const unsigned int *iranges, *opprop = NULL; + int oplen = 0; + unsigned long opaddr; + int na = 1; + + np = of_find_node_by_type(NULL, "open-pic"); + if (np == NULL) + return; + root = of_find_node_by_path("/"); + if (root) { + opprop = of_get_property(root, "platform-open-pic", &oplen); + na = of_n_addr_cells(root); + } + if (opprop && oplen >= na * sizeof(unsigned int)) { + opaddr = opprop[na-1]; /* assume 32-bit */ + oplen /= na * sizeof(unsigned int); + } else { + struct resource r; + if (of_address_to_resource(np, 0, &r)) { + goto bail; + } + opaddr = r.start; + oplen = 0; + } + + printk(KERN_INFO "OpenPIC at %lx\n", opaddr); + + iranges = of_get_property(np, "interrupt-ranges", &len); + if (iranges == NULL) + len = 0; /* non-distributed mpic */ + else + len /= 2 * sizeof(unsigned int); + + /* + * The first pair of cells in interrupt-ranges refers to the + * IDU; subsequent pairs refer to the ISUs. + */ + if (oplen < len) { + printk(KERN_ERR "Insufficient addresses for distributed" + " OpenPIC (%d < %d)\n", oplen, len); + len = oplen; + } + + isu_size = 0; + if (len > 0 && iranges[1] != 0) { + printk(KERN_INFO "OpenPIC irqs %d..%d in IDU\n", + iranges[0], iranges[0] + iranges[1] - 1); + } + if (len > 1) + isu_size = iranges[3]; + + chrp_mpic = mpic_alloc(np, opaddr, MPIC_NO_RESET, + isu_size, 0, " MPIC "); + if (chrp_mpic == NULL) { + printk(KERN_ERR "Failed to allocate MPIC structure\n"); + goto bail; + } + j = na - 1; + for (i = 1; i < len; ++i) { + iranges += 2; + j += na; + printk(KERN_INFO "OpenPIC irqs %d..%d in ISU at %x\n", + iranges[0], iranges[0] + iranges[1] - 1, + opprop[j]); + mpic_assign_isu(chrp_mpic, i - 1, opprop[j]); + } + + mpic_init(chrp_mpic); + ppc_md.get_irq = mpic_get_irq; + bail: + of_node_put(root); + of_node_put(np); +} + +static void __init chrp_find_8259(void) +{ + struct device_node *np, *pic = NULL; + unsigned long chrp_int_ack = 0; + unsigned int cascade_irq; + + /* Look for cascade */ + for_each_node_by_type(np, "interrupt-controller") + if (of_device_is_compatible(np, "chrp,iic")) { + pic = np; + break; + } + /* Ok, 8259 wasn't found. We need to handle the case where + * we have a pegasos that claims to be chrp but doesn't have + * a proper interrupt tree + */ + if (pic == NULL && chrp_mpic != NULL) { + printk(KERN_ERR "i8259: Not found in device-tree" + " assuming no legacy interrupts\n"); + return; + } + + /* Look for intack. In a perfect world, we would look for it on + * the ISA bus that holds the 8259 but heh... Works that way. If + * we ever see a problem, we can try to re-use the pSeries code here. + * Also, Pegasos-type platforms don't have a proper node to start + * from anyway + */ + for_each_node_by_name(np, "pci") { + const unsigned int *addrp = of_get_property(np, + "8259-interrupt-acknowledge", NULL); + + if (addrp == NULL) + continue; + chrp_int_ack = addrp[of_n_addr_cells(np)-1]; + break; + } + of_node_put(np); + if (np == NULL) + printk(KERN_WARNING "Cannot find PCI interrupt acknowledge" + " address, polling\n"); + + i8259_init(pic, chrp_int_ack); + if (ppc_md.get_irq == NULL) { + ppc_md.get_irq = i8259_irq; + irq_set_default_host(i8259_get_host()); + } + if (chrp_mpic != NULL) { + cascade_irq = irq_of_parse_and_map(pic, 0); + if (!cascade_irq) + printk(KERN_ERR "i8259: failed to map cascade irq\n"); + else + irq_set_chained_handler(cascade_irq, + chrp_8259_cascade); + } +} + +static void __init chrp_init_IRQ(void) +{ +#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON) + struct device_node *kbd; +#endif + chrp_find_openpic(); + chrp_find_8259(); + +#ifdef CONFIG_SMP + /* Pegasos has no MPIC, those ops would make it crash. It might be an + * option to move setting them to after we probe the PIC though + */ + if (chrp_mpic != NULL) + smp_ops = &chrp_smp_ops; +#endif /* CONFIG_SMP */ + + if (_chrp_type == _CHRP_Pegasos) + ppc_md.get_irq = i8259_irq; + +#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON) + /* see if there is a keyboard in the device tree + with a parent of type "adb" */ + for_each_node_by_name(kbd, "keyboard") + if (of_node_is_type(kbd->parent, "adb")) + break; + of_node_put(kbd); + if (kbd) { + if (request_irq(HYDRA_INT_ADB_NMI, xmon_irq, 0, "XMON break", + NULL)) + pr_err("Failed to register XMON break interrupt\n"); + } +#endif +} + +static void __init +chrp_init2(void) +{ +#if IS_ENABLED(CONFIG_NVRAM) + chrp_nvram_init(); +#endif + + request_region(0x20,0x20,"pic1"); + request_region(0xa0,0x20,"pic2"); + request_region(0x00,0x20,"dma1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xc0,0x20,"dma2"); + + if (ppc_md.progress) + ppc_md.progress(" Have fun! ", 0x7777); +} + +static int __init chrp_probe(void) +{ + const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(), + "device_type", NULL); + if (dtype == NULL) + return 0; + if (strcmp(dtype, "chrp")) + return 0; + + DMA_MODE_READ = 0x44; + DMA_MODE_WRITE = 0x48; + + pm_power_off = rtas_power_off; + + chrp_init(); + + return 1; +} + +define_machine(chrp) { + .name = "CHRP", + .probe = chrp_probe, + .setup_arch = chrp_setup_arch, + .discover_phbs = chrp_find_bridges, + .init = chrp_init2, + .show_cpuinfo = chrp_show_cpuinfo, + .init_IRQ = chrp_init_IRQ, + .restart = rtas_restart, + .halt = rtas_halt, + .time_init = chrp_time_init, + .set_rtc_time = chrp_set_rtc_time, + .get_rtc_time = chrp_get_rtc_time, + .phys_mem_access_prot = pci_phys_mem_access_prot, +}; diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c new file mode 100644 index 0000000000..ab95155647 --- /dev/null +++ b/arch/powerpc/platforms/chrp/smp.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Smp support for CHRP machines. + * + * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great + * deal of code from the sparc and intel versions. + * + * Copyright (C) 1999 Cort Dougan + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int smp_chrp_kick_cpu(int nr) +{ + *(unsigned long *)KERNELBASE = nr; + asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory"); + + return 0; +} + +static void smp_chrp_setup_cpu(int cpu_nr) +{ + mpic_setup_this_cpu(); +} + +/* CHRP with openpic */ +struct smp_ops_t chrp_smp_ops = { + .cause_nmi_ipi = NULL, + .message_pass = smp_mpic_message_pass, + .probe = smp_mpic_probe, + .kick_cpu = smp_chrp_kick_cpu, + .setup_cpu = smp_chrp_setup_cpu, + .give_timebase = rtas_give_timebase, + .take_timebase = rtas_take_timebase, +}; diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c new file mode 100644 index 0000000000..d46417e3d8 --- /dev/null +++ b/arch/powerpc/platforms/chrp/time.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1991, 1992, 1995 Linus Torvalds + * + * Adapted for PowerPC (PReP) by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu). + * Copied and modified from arch/i386/kernel/time.c + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define NVRAM_AS0 0x74 +#define NVRAM_AS1 0x75 +#define NVRAM_DATA 0x77 + +static int nvram_as1 = NVRAM_AS1; +static int nvram_as0 = NVRAM_AS0; +static int nvram_data = NVRAM_DATA; + +long __init chrp_time_init(void) +{ + struct device_node *rtcs; + struct resource r; + int base; + + rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00"); + if (rtcs == NULL) + rtcs = of_find_compatible_node(NULL, "rtc", "ds1385-rtc"); + if (rtcs == NULL) + return 0; + if (of_address_to_resource(rtcs, 0, &r)) { + of_node_put(rtcs); + return 0; + } + of_node_put(rtcs); + + base = r.start; + nvram_as1 = 0; + nvram_as0 = base; + nvram_data = base + 1; + + return 0; +} + +static int chrp_cmos_clock_read(int addr) +{ + if (nvram_as1 != 0) + outb(addr>>8, nvram_as1); + outb(addr, nvram_as0); + return (inb(nvram_data)); +} + +static void chrp_cmos_clock_write(unsigned long val, int addr) +{ + if (nvram_as1 != 0) + outb(addr>>8, nvram_as1); + outb(addr, nvram_as0); + outb(val, nvram_data); + return; +} + +/* + * Set the hardware clock. -- Cort + */ +int chrp_set_rtc_time(struct rtc_time *tmarg) +{ + unsigned char save_control, save_freq_select; + struct rtc_time tm = *tmarg; + + spin_lock(&rtc_lock); + + save_control = chrp_cmos_clock_read(RTC_CONTROL); /* tell the clock it's being set */ + + chrp_cmos_clock_write((save_control|RTC_SET), RTC_CONTROL); + + save_freq_select = chrp_cmos_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */ + + chrp_cmos_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + tm.tm_sec = bin2bcd(tm.tm_sec); + tm.tm_min = bin2bcd(tm.tm_min); + tm.tm_hour = bin2bcd(tm.tm_hour); + tm.tm_mon = bin2bcd(tm.tm_mon); + tm.tm_mday = bin2bcd(tm.tm_mday); + tm.tm_year = bin2bcd(tm.tm_year); + } + chrp_cmos_clock_write(tm.tm_sec,RTC_SECONDS); + chrp_cmos_clock_write(tm.tm_min,RTC_MINUTES); + chrp_cmos_clock_write(tm.tm_hour,RTC_HOURS); + chrp_cmos_clock_write(tm.tm_mon,RTC_MONTH); + chrp_cmos_clock_write(tm.tm_mday,RTC_DAY_OF_MONTH); + chrp_cmos_clock_write(tm.tm_year,RTC_YEAR); + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + chrp_cmos_clock_write(save_control, RTC_CONTROL); + chrp_cmos_clock_write(save_freq_select, RTC_FREQ_SELECT); + + spin_unlock(&rtc_lock); + return 0; +} + +void chrp_get_rtc_time(struct rtc_time *tm) +{ + unsigned int year, mon, day, hour, min, sec; + + do { + sec = chrp_cmos_clock_read(RTC_SECONDS); + min = chrp_cmos_clock_read(RTC_MINUTES); + hour = chrp_cmos_clock_read(RTC_HOURS); + day = chrp_cmos_clock_read(RTC_DAY_OF_MONTH); + mon = chrp_cmos_clock_read(RTC_MONTH); + year = chrp_cmos_clock_read(RTC_YEAR); + } while (sec != chrp_cmos_clock_read(RTC_SECONDS)); + + if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + sec = bcd2bin(sec); + min = bcd2bin(min); + hour = bcd2bin(hour); + day = bcd2bin(day); + mon = bcd2bin(mon); + year = bcd2bin(year); + } + if (year < 70) + year += 100; + tm->tm_sec = sec; + tm->tm_min = min; + tm->tm_hour = hour; + tm->tm_mday = day; + tm->tm_mon = mon; + tm->tm_year = year; +} diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig new file mode 100644 index 0000000000..c6adff216f --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0 +config EMBEDDED6xx + bool "Embedded 6xx/7xx/7xxx-based boards" + depends on PPC_BOOK3S_32 && BROKEN_ON_SMP + +config LINKSTATION + bool "Linkstation / Kurobox(HG) from Buffalo" + depends on EMBEDDED6xx + select MPIC + select FSL_SOC + select PPC_UDBG_16550 if SERIAL_8250 + select DEFAULT_UIMAGE + imply MPC10X_BRIDGE if PCI + help + Select LINKSTATION if configuring for one of PPC- (MPC8241) + based NAS systems from Buffalo Technology. So far only + KuroboxHG has been tested. In the future classical Kurobox, + Linkstation-I HD-HLAN and HD-HGLAN versions, and PPC-based + Terastation systems should be supported too. + +config STORCENTER + bool "IOMEGA StorCenter" + depends on EMBEDDED6xx + select MPIC + select FSL_SOC + select PPC_UDBG_16550 if SERIAL_8250 + imply MPC10X_BRIDGE if PCI + help + Select STORCENTER if configuring for the iomega StorCenter + with an 8241 CPU in it. + +config PPC_HOLLY + bool "PPC750GX/CL with TSI10x bridge (Hickory/Holly)" + depends on EMBEDDED6xx + select TSI108_BRIDGE + select PPC_UDBG_16550 + help + Select PPC_HOLLY if configuring for an IBM 750GX/CL Eval + Board with TSI108/9 bridge (Hickory/Holly) + +config MVME5100 + bool "Motorola/Emerson MVME5100" + depends on EMBEDDED6xx + select MPIC + select FORCE_PCI + select PPC_INDIRECT_PCI + select PPC_I8259 + select PPC_HASH_MMU_NATIVE + select PPC_UDBG_16550 + help + This option enables support for the Motorola (now Emerson) MVME5100 + board. + +config TSI108_BRIDGE + bool + select FORCE_PCI + select MPIC + select MPIC_WEIRD + +config MPC10X_BRIDGE + bool + select PPC_INDIRECT_PCI + +config GAMECUBE_COMMON + bool + +config USBGECKO_UDBG + bool "USB Gecko udbg console for the Nintendo GameCube/Wii" + depends on GAMECUBE_COMMON + help + If you say yes to this option, support will be included for the + USB Gecko adapter as an udbg console. + The USB Gecko is a EXI to USB Serial converter that can be plugged + into a memcard slot in the Nintendo GameCube/Wii. + + This driver bypasses the EXI layer completely. + + If in doubt, say N here. + +config GAMECUBE + bool "Nintendo-GameCube" + depends on EMBEDDED6xx + select GAMECUBE_COMMON + help + Select GAMECUBE if configuring for the Nintendo GameCube. + More information at: + +config WII + bool "Nintendo-Wii" + depends on EMBEDDED6xx + select GAMECUBE_COMMON + help + Select WII if configuring for the Nintendo Wii. + More information at: diff --git a/arch/powerpc/platforms/embedded6xx/Makefile b/arch/powerpc/platforms/embedded6xx/Makefile new file mode 100644 index 0000000000..7f2a8154e5 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the 6xx/7xx/7xxxx linux kernel. +# +obj-$(CONFIG_LINKSTATION) += linkstation.o ls_uart.o +obj-$(CONFIG_STORCENTER) += storcenter.o +obj-$(CONFIG_PPC_HOLLY) += holly.o +obj-$(CONFIG_USBGECKO_UDBG) += usbgecko_udbg.o +obj-$(CONFIG_GAMECUBE_COMMON) += flipper-pic.o +obj-$(CONFIG_GAMECUBE) += gamecube.o +obj-$(CONFIG_WII) += wii.o hlwd-pic.o +obj-$(CONFIG_MVME5100) += mvme5100.o diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c new file mode 100644 index 0000000000..4d9200bdba --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/embedded6xx/flipper-pic.c + * + * Nintendo GameCube/Wii "Flipper" interrupt controller support. + * Copyright (C) 2004-2009 The GameCube Linux Team + * Copyright (C) 2007,2008,2009 Albert Herranz + */ +#define DRV_MODULE_NAME "flipper-pic" +#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "flipper-pic.h" + +#define FLIPPER_NR_IRQS 32 + +/* + * Each interrupt has a corresponding bit in both + * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers. + * + * Enabling/disabling an interrupt line involves setting/clearing + * the corresponding bit in IMR. + * Except for the RSW interrupt, all interrupts get deasserted automatically + * when the source deasserts the interrupt. + */ +#define FLIPPER_ICR 0x00 +#define FLIPPER_ICR_RSS (1<<16) /* reset switch state */ + +#define FLIPPER_IMR 0x04 + +#define FLIPPER_RESET 0x24 + + +/* + * IRQ chip hooks. + * + */ + +static void flipper_pic_mask_and_ack(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + u32 mask = 1 << irq; + + clrbits32(io_base + FLIPPER_IMR, mask); + /* this is at least needed for RSW */ + out_be32(io_base + FLIPPER_ICR, mask); +} + +static void flipper_pic_ack(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + + /* this is at least needed for RSW */ + out_be32(io_base + FLIPPER_ICR, 1 << irq); +} + +static void flipper_pic_mask(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + + clrbits32(io_base + FLIPPER_IMR, 1 << irq); +} + +static void flipper_pic_unmask(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + + setbits32(io_base + FLIPPER_IMR, 1 << irq); +} + + +static struct irq_chip flipper_pic = { + .name = "flipper-pic", + .irq_ack = flipper_pic_ack, + .irq_mask_ack = flipper_pic_mask_and_ack, + .irq_mask = flipper_pic_mask, + .irq_unmask = flipper_pic_unmask, +}; + +/* + * IRQ host hooks. + * + */ + +static struct irq_domain *flipper_irq_host; + +static int flipper_pic_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(virq, h->host_data); + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_chip_and_handler(virq, &flipper_pic, handle_level_irq); + return 0; +} + +static const struct irq_domain_ops flipper_irq_domain_ops = { + .map = flipper_pic_map, +}; + +/* + * Platform hooks. + * + */ + +static void __flipper_quiesce(void __iomem *io_base) +{ + /* mask and ack all IRQs */ + out_be32(io_base + FLIPPER_IMR, 0x00000000); + out_be32(io_base + FLIPPER_ICR, 0xffffffff); +} + +static struct irq_domain * __init flipper_pic_init(struct device_node *np) +{ + struct device_node *pi; + struct irq_domain *irq_domain = NULL; + struct resource res; + void __iomem *io_base; + int retval; + + pi = of_get_parent(np); + if (!pi) { + pr_err("no parent found\n"); + goto out; + } + if (!of_device_is_compatible(pi, "nintendo,flipper-pi")) { + pr_err("unexpected parent compatible\n"); + goto out; + } + + retval = of_address_to_resource(pi, 0, &res); + if (retval) { + pr_err("no io memory range found\n"); + goto out; + } + io_base = ioremap(res.start, resource_size(&res)); + + pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base); + + __flipper_quiesce(io_base); + + irq_domain = irq_domain_add_linear(np, FLIPPER_NR_IRQS, + &flipper_irq_domain_ops, io_base); + if (!irq_domain) { + pr_err("failed to allocate irq_domain\n"); + return NULL; + } + +out: + return irq_domain; +} + +unsigned int flipper_pic_get_irq(void) +{ + void __iomem *io_base = flipper_irq_host->host_data; + int irq; + u32 irq_status; + + irq_status = in_be32(io_base + FLIPPER_ICR) & + in_be32(io_base + FLIPPER_IMR); + if (irq_status == 0) + return 0; /* no more IRQs pending */ + + irq = __ffs(irq_status); + return irq_linear_revmap(flipper_irq_host, irq); +} + +/* + * Probe function. + * + */ + +void __init flipper_pic_probe(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-pic"); + BUG_ON(!np); + + flipper_irq_host = flipper_pic_init(np); + BUG_ON(!flipper_irq_host); + + irq_set_default_host(flipper_irq_host); + + of_node_put(np); +} + +/* + * Misc functions related to the flipper chipset. + * + */ + +/** + * flipper_quiesce() - quiesce flipper irq controller + * + * Mask and ack all interrupt sources. + * + */ +void flipper_quiesce(void) +{ + void __iomem *io_base = flipper_irq_host->host_data; + + __flipper_quiesce(io_base); +} + +/* + * Resets the platform. + */ +void flipper_platform_reset(void) +{ + void __iomem *io_base; + + if (flipper_irq_host && flipper_irq_host->host_data) { + io_base = flipper_irq_host->host_data; + out_8(io_base + FLIPPER_RESET, 0x00); + } +} + +/* + * Returns non-zero if the reset button is pressed. + */ +int flipper_is_reset_button_pressed(void) +{ + void __iomem *io_base; + u32 icr; + + if (flipper_irq_host && flipper_irq_host->host_data) { + io_base = flipper_irq_host->host_data; + icr = in_be32(io_base + FLIPPER_ICR); + return !(icr & FLIPPER_ICR_RSS); + } + return 0; +} + diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.h b/arch/powerpc/platforms/embedded6xx/flipper-pic.h new file mode 100644 index 0000000000..024ae70baa --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * arch/powerpc/platforms/embedded6xx/flipper-pic.h + * + * Nintendo GameCube/Wii "Flipper" interrupt controller support. + * Copyright (C) 2004-2009 The GameCube Linux Team + * Copyright (C) 2007,2008,2009 Albert Herranz + */ + +#ifndef __FLIPPER_PIC_H +#define __FLIPPER_PIC_H + +unsigned int flipper_pic_get_irq(void); +void __init flipper_pic_probe(void); + +void flipper_quiesce(void); +void flipper_platform_reset(void); +int flipper_is_reset_button_pressed(void); + +#endif diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c new file mode 100644 index 0000000000..e3b2c74647 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/embedded6xx/gamecube.c + * + * Nintendo GameCube board-specific support + * Copyright (C) 2004-2009 The GameCube Linux Team + * Copyright (C) 2007,2008,2009 Albert Herranz + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "flipper-pic.h" +#include "usbgecko_udbg.h" + + +static void __noreturn gamecube_spin(void) +{ + /* spin until power button pressed */ + for (;;) + cpu_relax(); +} + +static void __noreturn gamecube_restart(char *cmd) +{ + local_irq_disable(); + flipper_platform_reset(); + gamecube_spin(); +} + +static void gamecube_power_off(void) +{ + local_irq_disable(); + gamecube_spin(); +} + +static void __noreturn gamecube_halt(void) +{ + gamecube_restart(NULL); +} + +static int __init gamecube_probe(void) +{ + pm_power_off = gamecube_power_off; + + ug_udbg_init(); + + return 1; +} + +static void gamecube_shutdown(void) +{ + flipper_quiesce(); +} + +define_machine(gamecube) { + .name = "gamecube", + .compatible = "nintendo,gamecube", + .probe = gamecube_probe, + .restart = gamecube_restart, + .halt = gamecube_halt, + .init_IRQ = flipper_pic_probe, + .get_irq = flipper_pic_get_irq, + .progress = udbg_progress, + .machine_shutdown = gamecube_shutdown, +}; + + +static const struct of_device_id gamecube_of_bus[] = { + { .compatible = "nintendo,flipper", }, + { }, +}; + +static int __init gamecube_device_probe(void) +{ + of_platform_bus_probe(NULL, gamecube_of_bus, NULL); + return 0; +} +machine_device_initcall(gamecube, gamecube_device_probe); + diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c new file mode 100644 index 0000000000..4d2d92de30 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/embedded6xx/hlwd-pic.c + * + * Nintendo Wii "Hollywood" interrupt controller support. + * Copyright (C) 2009 The GameCube Linux Team + * Copyright (C) 2009 Albert Herranz + */ +#define DRV_MODULE_NAME "hlwd-pic" +#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "hlwd-pic.h" + +#define HLWD_NR_IRQS 32 + +/* + * Each interrupt has a corresponding bit in both + * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers. + * + * Enabling/disabling an interrupt line involves asserting/clearing + * the corresponding bit in IMR. ACK'ing a request simply involves + * asserting the corresponding bit in ICR. + */ +#define HW_BROADWAY_ICR 0x00 +#define HW_BROADWAY_IMR 0x04 +#define HW_STARLET_ICR 0x08 +#define HW_STARLET_IMR 0x0c + + +/* + * IRQ chip hooks. + * + */ + +static void hlwd_pic_mask_and_ack(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + u32 mask = 1 << irq; + + clrbits32(io_base + HW_BROADWAY_IMR, mask); + out_be32(io_base + HW_BROADWAY_ICR, mask); +} + +static void hlwd_pic_ack(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + + out_be32(io_base + HW_BROADWAY_ICR, 1 << irq); +} + +static void hlwd_pic_mask(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + + clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq); +} + +static void hlwd_pic_unmask(struct irq_data *d) +{ + int irq = irqd_to_hwirq(d); + void __iomem *io_base = irq_data_get_irq_chip_data(d); + + setbits32(io_base + HW_BROADWAY_IMR, 1 << irq); + + /* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */ + clrbits32(io_base + HW_STARLET_IMR, 1 << irq); +} + + +static struct irq_chip hlwd_pic = { + .name = "hlwd-pic", + .irq_ack = hlwd_pic_ack, + .irq_mask_ack = hlwd_pic_mask_and_ack, + .irq_mask = hlwd_pic_mask, + .irq_unmask = hlwd_pic_unmask, +}; + +/* + * IRQ host hooks. + * + */ + +static struct irq_domain *hlwd_irq_host; + +static int hlwd_pic_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(virq, h->host_data); + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_chip_and_handler(virq, &hlwd_pic, handle_level_irq); + return 0; +} + +static const struct irq_domain_ops hlwd_irq_domain_ops = { + .map = hlwd_pic_map, +}; + +static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) +{ + void __iomem *io_base = h->host_data; + u32 irq_status; + + irq_status = in_be32(io_base + HW_BROADWAY_ICR) & + in_be32(io_base + HW_BROADWAY_IMR); + if (irq_status == 0) + return 0; /* no more IRQs pending */ + + return __ffs(irq_status); +} + +static void hlwd_pic_irq_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); + unsigned int hwirq; + + raw_spin_lock(&desc->lock); + chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */ + raw_spin_unlock(&desc->lock); + + hwirq = __hlwd_pic_get_irq(irq_domain); + if (hwirq) + generic_handle_domain_irq(irq_domain, hwirq); + else + pr_err("spurious interrupt!\n"); + + raw_spin_lock(&desc->lock); + chip->irq_ack(&desc->irq_data); /* IRQ_LEVEL */ + if (!irqd_irq_disabled(&desc->irq_data) && chip->irq_unmask) + chip->irq_unmask(&desc->irq_data); + raw_spin_unlock(&desc->lock); +} + +/* + * Platform hooks. + * + */ + +static void __hlwd_quiesce(void __iomem *io_base) +{ + /* mask and ack all IRQs */ + out_be32(io_base + HW_BROADWAY_IMR, 0); + out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff); +} + +static struct irq_domain *__init hlwd_pic_init(struct device_node *np) +{ + struct irq_domain *irq_domain; + struct resource res; + void __iomem *io_base; + int retval; + + retval = of_address_to_resource(np, 0, &res); + if (retval) { + pr_err("no io memory range found\n"); + return NULL; + } + io_base = ioremap(res.start, resource_size(&res)); + if (!io_base) { + pr_err("ioremap failed\n"); + return NULL; + } + + pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base); + + __hlwd_quiesce(io_base); + + irq_domain = irq_domain_add_linear(np, HLWD_NR_IRQS, + &hlwd_irq_domain_ops, io_base); + if (!irq_domain) { + pr_err("failed to allocate irq_domain\n"); + iounmap(io_base); + return NULL; + } + + return irq_domain; +} + +unsigned int hlwd_pic_get_irq(void) +{ + unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host); + return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0; +} + +/* + * Probe function. + * + */ + +void __init hlwd_pic_probe(void) +{ + struct irq_domain *host; + struct device_node *np; + const u32 *interrupts; + int cascade_virq; + + for_each_compatible_node(np, NULL, "nintendo,hollywood-pic") { + interrupts = of_get_property(np, "interrupts", NULL); + if (interrupts) { + host = hlwd_pic_init(np); + BUG_ON(!host); + cascade_virq = irq_of_parse_and_map(np, 0); + irq_set_handler_data(cascade_virq, host); + irq_set_chained_handler(cascade_virq, + hlwd_pic_irq_cascade); + hlwd_irq_host = host; + of_node_put(np); + break; + } + } +} + +/** + * hlwd_quiesce() - quiesce hollywood irq controller + * + * Mask and ack all interrupt sources. + * + */ +void hlwd_quiesce(void) +{ + void __iomem *io_base = hlwd_irq_host->host_data; + + __hlwd_quiesce(io_base); +} + diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h new file mode 100644 index 0000000000..c2fa42e191 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * arch/powerpc/platforms/embedded6xx/hlwd-pic.h + * + * Nintendo Wii "Hollywood" interrupt controller support. + * Copyright (C) 2009 The GameCube Linux Team + * Copyright (C) 2009 Albert Herranz + */ + +#ifndef __HLWD_PIC_H +#define __HLWD_PIC_H + +extern unsigned int hlwd_pic_get_irq(void); +void __init hlwd_pic_probe(void); +extern void hlwd_quiesce(void); + +#endif diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c new file mode 100644 index 0000000000..ce9e58ee97 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Board setup routines for the IBM 750GX/CL platform w/ TSI10x bridge + * + * Copyright 2007 IBM Corporation + * + * Stephen Winiecki + * Josh Boyer + * + * Based on code from mpc7448_hpc2.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +#define HOLLY_PCI_CFG_PHYS 0x7c000000 + +static int holly_exclude_device(struct pci_controller *hose, u_char bus, + u_char devfn) +{ + if (bus == 0 && PCI_SLOT(devfn) == 0) + return PCIBIOS_DEVICE_NOT_FOUND; + else + return PCIBIOS_SUCCESSFUL; +} + +static void __init holly_remap_bridge(void) +{ + u32 lut_val, lut_addr; + int i; + + printk(KERN_INFO "Remapping PCI bridge\n"); + + /* Re-init the PCI bridge and LUT registers to have mappings that don't + * rely on PIBS + */ + lut_addr = 0x900; + for (i = 0; i < 31; i++) { + tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000201); + lut_addr += 4; + tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0); + lut_addr += 4; + } + + /* Reserve the last LUT entry for PCI I/O space */ + tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000241); + lut_addr += 4; + tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0); + + /* Map PCI I/O space */ + tsi108_write_reg(TSI108_PCI_PFAB_IO_UPPER, 0x0); + tsi108_write_reg(TSI108_PCI_PFAB_IO, 0x1); + + /* Map PCI CFG space */ + tsi108_write_reg(TSI108_PCI_PFAB_BAR0_UPPER, 0x0); + tsi108_write_reg(TSI108_PCI_PFAB_BAR0, 0x7c000000 | 0x01); + + /* We don't need MEM32 and PRM remapping so disable them */ + tsi108_write_reg(TSI108_PCI_PFAB_MEM32, 0x0); + tsi108_write_reg(TSI108_PCI_PFAB_PFM3, 0x0); + tsi108_write_reg(TSI108_PCI_PFAB_PFM4, 0x0); + + /* Set P2O_BAR0 */ + tsi108_write_reg(TSI108_PCI_P2O_BAR0_UPPER, 0x0); + tsi108_write_reg(TSI108_PCI_P2O_BAR0, 0xc0000000); + + /* Init the PCI LUTs to do no remapping */ + lut_addr = 0x500; + lut_val = 0x00000002; + + for (i = 0; i < 32; i++) { + tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, lut_val); + lut_addr += 4; + tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, 0x40000000); + lut_addr += 4; + lut_val += 0x02000000; + } + tsi108_write_reg(TSI108_PCI_P2O_PAGE_SIZES, 0x00007900); + + /* Set 64-bit PCI bus address for system memory */ + tsi108_write_reg(TSI108_PCI_P2O_BAR2_UPPER, 0x0); + tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0); +} + +static void __init holly_init_pci(void) +{ + struct device_node *np; + + if (ppc_md.progress) + ppc_md.progress("holly_setup_arch():set_bridge", 0); + + /* setup PCI host bridge */ + holly_remap_bridge(); + + np = of_find_node_by_type(NULL, "pci"); + if (np) + tsi108_setup_pci(np, HOLLY_PCI_CFG_PHYS, 1); + + of_node_put(np); + + ppc_md.pci_exclude_device = holly_exclude_device; + if (ppc_md.progress) + ppc_md.progress("tsi108: resources set", 0x100); +} + +static void __init holly_setup_arch(void) +{ + tsi108_csr_vir_base = get_vir_csrbase(); + + printk(KERN_INFO "PPC750GX/CL Platform\n"); +} + +/* + * Interrupt setup and service. Interrupts on the holly come + * from the four external INT pins, PCI interrupts are routed via + * PCI interrupt control registers, it generates internal IRQ23 + * + * Interrupt routing on the Holly Board: + * TSI108:PB_INT[0] -> CPU0:INT# + * TSI108:PB_INT[1] -> CPU0:MCP# + * TSI108:PB_INT[2] -> N/C + * TSI108:PB_INT[3] -> N/C + */ +static void __init holly_init_IRQ(void) +{ + struct mpic *mpic; +#ifdef CONFIG_PCI + unsigned int cascade_pci_irq; + struct device_node *tsi_pci; + struct device_node *cascade_node = NULL; +#endif + + mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108, + 24, 0, + "Tsi108_PIC"); + + BUG_ON(mpic == NULL); + + mpic_assign_isu(mpic, 0, mpic->paddr + 0x100); + + mpic_init(mpic); + +#ifdef CONFIG_PCI + tsi_pci = of_find_node_by_type(NULL, "pci"); + if (tsi_pci == NULL) { + printk(KERN_ERR "%s: No tsi108 pci node found !\n", __func__); + return; + } + + cascade_node = of_find_node_by_type(NULL, "pic-router"); + if (cascade_node == NULL) { + printk(KERN_ERR "%s: No tsi108 pci cascade node found !\n", __func__); + return; + } + + cascade_pci_irq = irq_of_parse_and_map(tsi_pci, 0); + pr_debug("%s: tsi108 cascade_pci_irq = 0x%x\n", __func__, (u32) cascade_pci_irq); + tsi108_pci_int_init(cascade_node); + irq_set_handler_data(cascade_pci_irq, mpic); + irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade); + + of_node_put(tsi_pci); + of_node_put(cascade_node); +#endif + /* Configure MPIC outputs to CPU0 */ + tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0); +} + +static void holly_show_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "vendor\t\t: IBM\n"); + seq_printf(m, "machine\t\t: PPC750 GX/CL\n"); +} + +static void __noreturn holly_restart(char *cmd) +{ + __be32 __iomem *ocn_bar1 = NULL; + unsigned long bar; + struct device_node *bridge = NULL; + struct resource res; + phys_addr_t addr = 0xc0000000; + + local_irq_disable(); + + bridge = of_find_node_by_type(NULL, "tsi-bridge"); + if (bridge) { + of_address_to_resource(bridge, 0, &res); + addr = res.start; + of_node_put(bridge); + } + addr += (TSI108_PB_OFFSET + 0x414); + + ocn_bar1 = ioremap(addr, 0x4); + + /* Turn on the BOOT bit so the addresses are correctly + * routed to the HLP interface */ + bar = ioread32be(ocn_bar1); + bar |= 2; + iowrite32be(bar, ocn_bar1); + iosync(); + + /* Set SRR0 to the reset vector and turn on MSR_IP */ + mtspr(SPRN_SRR0, 0xfff00100); + mtspr(SPRN_SRR1, MSR_IP); + + /* Do an rfi to jump back to firmware. Somewhat evil, + * but it works + */ + __asm__ __volatile__("rfi" : : : "memory"); + + /* Spin until reset happens. Shouldn't really get here */ + for (;;) ; +} + +static int ppc750_machine_check_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + tsi108_clear_pci_cfg_error(); + regs_set_recoverable(regs); + regs_set_return_ip(regs, extable_fixup(entry)); + return 1; + } + return 0; +} + +define_machine(holly){ + .name = "PPC750 GX/CL TSI", + .compatible = "ibm,holly", + .setup_arch = holly_setup_arch, + .discover_phbs = holly_init_pci, + .init_IRQ = holly_init_IRQ, + .show_cpuinfo = holly_show_cpuinfo, + .get_irq = mpic_get_irq, + .restart = holly_restart, + .machine_check_exception = ppc750_machine_check_exception, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c new file mode 100644 index 0000000000..9c10aac40c --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -0,0 +1,162 @@ +/* + * Board setup routines for the Buffalo Linkstation / Kurobox Platform. + * + * Copyright (C) 2006 G. Liakhovetski (g.liakhovetski@gmx.de) + * + * Based on sandpoint.c by Mark A. Greer + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#include +#include +#include + +#include +#include +#include + +#include "mpc10x.h" + +static const struct of_device_id of_bus_ids[] __initconst = { + { .type = "soc", }, + { .compatible = "simple-bus", }, + {}, +}; + +static int __init declare_of_platform_devices(void) +{ + of_platform_bus_probe(NULL, of_bus_ids, NULL); + return 0; +} +machine_device_initcall(linkstation, declare_of_platform_devices); + +static int __init linkstation_add_bridge(struct device_node *dev) +{ +#ifdef CONFIG_PCI + int len; + struct pci_controller *hose; + const int *bus_range; + + printk("Adding PCI host bridge %pOF\n", dev); + + bus_range = of_get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) + printk(KERN_WARNING "Can't get bus-range for %pOF, assume" + " bus 0\n", dev); + + hose = pcibios_alloc_controller(dev); + if (hose == NULL) + return -ENOMEM; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, 1); +#endif + return 0; +} + +static void __init linkstation_setup_arch(void) +{ + printk(KERN_INFO "BUFFALO Network Attached Storage Series\n"); + printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n"); +} + +static void __init linkstation_setup_pci(void) +{ + struct device_node *np; + + /* Lookup PCI host bridges */ + for_each_compatible_node(np, "pci", "mpc10x-pci") + linkstation_add_bridge(np); +} + +/* + * Interrupt setup and service. Interrupts on the linkstation come + * from the four PCI slots plus onboard 8241 devices: I2C, DUART. + */ +static void __init linkstation_init_IRQ(void) +{ + struct mpic *mpic; + + mpic = mpic_alloc(NULL, 0, 0, 4, 0, " EPIC "); + BUG_ON(mpic == NULL); + + /* PCI IRQs */ + mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200); + + /* I2C */ + mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000); + + /* ttyS0, ttyS1 */ + mpic_assign_isu(mpic, 2, mpic->paddr + 0x11100); + + mpic_init(mpic); +} + +extern void avr_uart_configure(void); +extern void avr_uart_send(const char); + +static void __noreturn linkstation_restart(char *cmd) +{ + local_irq_disable(); + + /* Reset system via AVR */ + avr_uart_configure(); + /* Send reboot command */ + avr_uart_send('C'); + + for(;;) /* Spin until reset happens */ + avr_uart_send('G'); /* "kick" */ +} + +static void __noreturn linkstation_power_off(void) +{ + local_irq_disable(); + + /* Power down system via AVR */ + avr_uart_configure(); + /* send shutdown command */ + avr_uart_send('E'); + + for(;;) /* Spin until power-off happens */ + avr_uart_send('G'); /* "kick" */ + /* NOTREACHED */ +} + +static void __noreturn linkstation_halt(void) +{ + linkstation_power_off(); + /* NOTREACHED */ +} + +static void linkstation_show_cpuinfo(struct seq_file *m) +{ + seq_printf(m, "vendor\t\t: Buffalo Technology\n"); + seq_printf(m, "machine\t\t: Linkstation I/Kurobox(HG)\n"); +} + +static int __init linkstation_probe(void) +{ + pm_power_off = linkstation_power_off; + + return 1; +} + +define_machine(linkstation){ + .name = "Buffalo Linkstation", + .compatible = "linkstation", + .probe = linkstation_probe, + .setup_arch = linkstation_setup_arch, + .discover_phbs = linkstation_setup_pci, + .init_IRQ = linkstation_init_IRQ, + .show_cpuinfo = linkstation_show_cpuinfo, + .get_irq = mpic_get_irq, + .restart = linkstation_restart, + .halt = linkstation_halt, +}; diff --git a/arch/powerpc/platforms/embedded6xx/ls_uart.c b/arch/powerpc/platforms/embedded6xx/ls_uart.c new file mode 100644 index 0000000000..6c1dbf8ae7 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/ls_uart.c @@ -0,0 +1,147 @@ +/* + * AVR power-management chip interface for the Buffalo Linkstation / + * Kurobox Platform. + * + * Author: 2006 (c) G. Liakhovetski + * g.liakhovetski@gmx.de + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of + * any kind, whether express or implied. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc10x.h" + +static void __iomem *avr_addr; +static unsigned long avr_clock; + +static struct work_struct wd_work; + +static void wd_stop(struct work_struct *unused) +{ + const char string[] = "AAAAFFFFJJJJ>>>>VVVV>>>>ZZZZVVVVKKKK"; + int i = 0, rescue = 8; + int len = strlen(string); + + while (rescue--) { + int j; + char lsr = in_8(avr_addr + UART_LSR); + + if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) { + for (j = 0; j < 16 && i < len; j++, i++) + out_8(avr_addr + UART_TX, string[i]); + if (i == len) { + /* Read "OK" back: 4ms for the last "KKKK" + plus a couple bytes back */ + msleep(7); + printk("linkstation: disarming the AVR watchdog: "); + while (in_8(avr_addr + UART_LSR) & UART_LSR_DR) + printk("%c", in_8(avr_addr + UART_RX)); + break; + } + } + msleep(17); + } + printk("\n"); +} + +#define AVR_QUOT(clock) ((clock) + 8 * 9600) / (16 * 9600) + +void avr_uart_configure(void) +{ + unsigned char cval = UART_LCR_WLEN8; + unsigned int quot = AVR_QUOT(avr_clock); + + if (!avr_addr || !avr_clock) + return; + + out_8(avr_addr + UART_LCR, cval); /* initialise UART */ + out_8(avr_addr + UART_MCR, 0); + out_8(avr_addr + UART_IER, 0); + + cval |= UART_LCR_STOP | UART_LCR_PARITY | UART_LCR_EPAR; + + out_8(avr_addr + UART_LCR, cval); /* Set character format */ + + out_8(avr_addr + UART_LCR, cval | UART_LCR_DLAB); /* set DLAB */ + out_8(avr_addr + UART_DLL, quot & 0xff); /* LS of divisor */ + out_8(avr_addr + UART_DLM, quot >> 8); /* MS of divisor */ + out_8(avr_addr + UART_LCR, cval); /* reset DLAB */ + out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO); /* enable FIFO */ +} + +void avr_uart_send(const char c) +{ + if (!avr_addr || !avr_clock) + return; + + out_8(avr_addr + UART_TX, c); + out_8(avr_addr + UART_TX, c); + out_8(avr_addr + UART_TX, c); + out_8(avr_addr + UART_TX, c); +} + +static void __init ls_uart_init(void) +{ + local_irq_disable(); + +#ifndef CONFIG_SERIAL_8250 + out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO); /* enable FIFO */ + out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO | + UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); /* clear FIFOs */ + out_8(avr_addr + UART_FCR, 0); + out_8(avr_addr + UART_IER, 0); + + /* Clear up interrupts */ + (void) in_8(avr_addr + UART_LSR); + (void) in_8(avr_addr + UART_RX); + (void) in_8(avr_addr + UART_IIR); + (void) in_8(avr_addr + UART_MSR); +#endif + avr_uart_configure(); + + local_irq_enable(); +} + +static int __init ls_uarts_init(void) +{ + struct device_node *avr; + struct resource res; + int len, ret; + + avr = of_find_node_by_path("/soc10x/serial@80004500"); + if (!avr) + return -EINVAL; + + avr_clock = *(u32*)of_get_property(avr, "clock-frequency", &len); + if (!avr_clock) + return -EINVAL; + + ret = of_address_to_resource(avr, 0, &res); + if (ret) + return ret; + + of_node_put(avr); + + avr_addr = ioremap(res.start, 32); + if (!avr_addr) + return -EFAULT; + + ls_uart_init(); + + INIT_WORK(&wd_work, wd_stop); + schedule_work(&wd_work); + + return 0; +} + +machine_late_initcall(linkstation, ls_uarts_init); diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h new file mode 100644 index 0000000000..5ad12023e5 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h @@ -0,0 +1,159 @@ +/* + * Common routines for the Motorola SPS MPC106/8240/107 Host bridge/Mem + * ctlr/EPIC/etc. + * + * Author: Mark A. Greer + * mgreer@mvista.com + * + * 2001 (c) MontaVista, Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#ifndef __PPC_KERNEL_MPC10X_H +#define __PPC_KERNEL_MPC10X_H + +#include +#include + +/* + * The values here don't completely map everything but should work in most + * cases. + * + * MAP A (PReP Map) + * Processor: 0x80000000 - 0x807fffff -> PCI I/O: 0x00000000 - 0x007fffff + * Processor: 0xc0000000 - 0xdfffffff -> PCI MEM: 0x00000000 - 0x1fffffff + * PCI MEM: 0x80000000 -> Processor System Memory: 0x00000000 + * + * MAP B (CHRP Map) + * Processor: 0xfe000000 - 0xfebfffff -> PCI I/O: 0x00000000 - 0x00bfffff + * Processor: 0x80000000 - 0xbfffffff -> PCI MEM: 0x80000000 - 0xbfffffff + * PCI MEM: 0x00000000 -> Processor System Memory: 0x00000000 + */ + +/* + * Define the vendor/device IDs for the various bridges--should be added to + * + */ +#define MPC10X_BRIDGE_106 ((PCI_DEVICE_ID_MOTOROLA_MPC106 << 16) | \ + PCI_VENDOR_ID_MOTOROLA) +#define MPC10X_BRIDGE_8240 ((0x0003 << 16) | PCI_VENDOR_ID_MOTOROLA) +#define MPC10X_BRIDGE_107 ((0x0004 << 16) | PCI_VENDOR_ID_MOTOROLA) +#define MPC10X_BRIDGE_8245 ((0x0006 << 16) | PCI_VENDOR_ID_MOTOROLA) + +/* Define the type of map to use */ +#define MPC10X_MEM_MAP_A 1 +#define MPC10X_MEM_MAP_B 2 + +/* Map A (PReP Map) Defines */ +#define MPC10X_MAPA_CNFG_ADDR 0x80000cf8 +#define MPC10X_MAPA_CNFG_DATA 0x80000cfc + +#define MPC10X_MAPA_ISA_IO_BASE 0x80000000 +#define MPC10X_MAPA_ISA_MEM_BASE 0xc0000000 +#define MPC10X_MAPA_DRAM_OFFSET 0x80000000 + +#define MPC10X_MAPA_PCI_INTACK_ADDR 0xbffffff0 +#define MPC10X_MAPA_PCI_IO_START 0x00000000 +#define MPC10X_MAPA_PCI_IO_END (0x00800000 - 1) +#define MPC10X_MAPA_PCI_MEM_START 0x00000000 +#define MPC10X_MAPA_PCI_MEM_END (0x20000000 - 1) + +#define MPC10X_MAPA_PCI_MEM_OFFSET (MPC10X_MAPA_ISA_MEM_BASE - \ + MPC10X_MAPA_PCI_MEM_START) + +/* Map B (CHRP Map) Defines */ +#define MPC10X_MAPB_CNFG_ADDR 0xfec00000 +#define MPC10X_MAPB_CNFG_DATA 0xfee00000 + +#define MPC10X_MAPB_ISA_IO_BASE 0xfe000000 +#define MPC10X_MAPB_ISA_MEM_BASE 0x80000000 +#define MPC10X_MAPB_DRAM_OFFSET 0x00000000 + +#define MPC10X_MAPB_PCI_INTACK_ADDR 0xfef00000 +#define MPC10X_MAPB_PCI_IO_START 0x00000000 +#define MPC10X_MAPB_PCI_IO_END (0x00c00000 - 1) +#define MPC10X_MAPB_PCI_MEM_START 0x80000000 +#define MPC10X_MAPB_PCI_MEM_END (0xc0000000 - 1) + +#define MPC10X_MAPB_PCI_MEM_OFFSET (MPC10X_MAPB_ISA_MEM_BASE - \ + MPC10X_MAPB_PCI_MEM_START) + +/* Miscellaneous Configuration register offsets */ +#define MPC10X_CFG_PIR_REG 0x09 +#define MPC10X_CFG_PIR_HOST_BRIDGE 0x00 +#define MPC10X_CFG_PIR_AGENT 0x01 + +#define MPC10X_CFG_EUMBBAR 0x78 + +#define MPC10X_CFG_PICR1_REG 0xa8 +#define MPC10X_CFG_PICR1_ADDR_MAP_MASK 0x00010000 +#define MPC10X_CFG_PICR1_ADDR_MAP_A 0x00010000 +#define MPC10X_CFG_PICR1_ADDR_MAP_B 0x00000000 +#define MPC10X_CFG_PICR1_SPEC_PCI_RD 0x00000004 +#define MPC10X_CFG_PICR1_ST_GATH_EN 0x00000040 + +#define MPC10X_CFG_PICR2_REG 0xac +#define MPC10X_CFG_PICR2_COPYBACK_OPT 0x00000001 + +#define MPC10X_CFG_MAPB_OPTIONS_REG 0xe0 +#define MPC10X_CFG_MAPB_OPTIONS_CFAE 0x80 /* CPU_FD_ALIAS_EN */ +#define MPC10X_CFG_MAPB_OPTIONS_PFAE 0x40 /* PCI_FD_ALIAS_EN */ +#define MPC10X_CFG_MAPB_OPTIONS_DR 0x20 /* DLL_RESET */ +#define MPC10X_CFG_MAPB_OPTIONS_PCICH 0x08 /* PCI_COMPATIBILITY_HOLE */ +#define MPC10X_CFG_MAPB_OPTIONS_PROCCH 0x04 /* PROC_COMPATIBILITY_HOLE */ + +/* Define offsets for the memory controller registers in the config space */ +#define MPC10X_MCTLR_MEM_START_1 0x80 /* Banks 0-3 */ +#define MPC10X_MCTLR_MEM_START_2 0x84 /* Banks 4-7 */ +#define MPC10X_MCTLR_EXT_MEM_START_1 0x88 /* Banks 0-3 */ +#define MPC10X_MCTLR_EXT_MEM_START_2 0x8c /* Banks 4-7 */ + +#define MPC10X_MCTLR_MEM_END_1 0x90 /* Banks 0-3 */ +#define MPC10X_MCTLR_MEM_END_2 0x94 /* Banks 4-7 */ +#define MPC10X_MCTLR_EXT_MEM_END_1 0x98 /* Banks 0-3 */ +#define MPC10X_MCTLR_EXT_MEM_END_2 0x9c /* Banks 4-7 */ + +#define MPC10X_MCTLR_MEM_BANK_ENABLES 0xa0 + +/* Define some offset in the EUMB */ +#define MPC10X_EUMB_SIZE 0x00100000 /* Total EUMB size (1MB) */ + +#define MPC10X_EUMB_MU_OFFSET 0x00000000 /* Msg Unit reg offset */ +#define MPC10X_EUMB_MU_SIZE 0x00001000 /* Msg Unit reg size */ +#define MPC10X_EUMB_DMA_OFFSET 0x00001000 /* DMA Unit reg offset */ +#define MPC10X_EUMB_DMA_SIZE 0x00001000 /* DMA Unit reg size */ +#define MPC10X_EUMB_ATU_OFFSET 0x00002000 /* Addr xlate reg offset */ +#define MPC10X_EUMB_ATU_SIZE 0x00001000 /* Addr xlate reg size */ +#define MPC10X_EUMB_I2C_OFFSET 0x00003000 /* I2C Unit reg offset */ +#define MPC10X_EUMB_I2C_SIZE 0x00001000 /* I2C Unit reg size */ +#define MPC10X_EUMB_DUART_OFFSET 0x00004000 /* DUART Unit reg offset (8245) */ +#define MPC10X_EUMB_DUART_SIZE 0x00001000 /* DUART Unit reg size (8245) */ +#define MPC10X_EUMB_EPIC_OFFSET 0x00040000 /* EPIC offset in EUMB */ +#define MPC10X_EUMB_EPIC_SIZE 0x00030000 /* EPIC size */ +#define MPC10X_EUMB_PM_OFFSET 0x000fe000 /* Performance Monitor reg offset (8245) */ +#define MPC10X_EUMB_PM_SIZE 0x00001000 /* Performance Monitor reg size (8245) */ +#define MPC10X_EUMB_WP_OFFSET 0x000ff000 /* Data path diagnostic, watchpoint reg offset */ +#define MPC10X_EUMB_WP_SIZE 0x00001000 /* Data path diagnostic, watchpoint reg size */ + +enum ppc_sys_devices { + MPC10X_IIC1, + MPC10X_DMA0, + MPC10X_DMA1, + MPC10X_UART0, + MPC10X_UART1, + NUM_PPC_SYS_DEVS, +}; + +int mpc10x_bridge_init(struct pci_controller *hose, + uint current_map, + uint new_map, + uint phys_eumb_base); +unsigned long mpc10x_get_mem_size(uint mem_map); +int mpc10x_enable_store_gathering(struct pci_controller *hose); +int mpc10x_disable_store_gathering(struct pci_controller *hose); + +/* For MPC107 boards that use the built-in openpic */ +void mpc10x_set_openpic(void); + +#endif /* __PPC_KERNEL_MPC10X_H */ diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c new file mode 100644 index 0000000000..00bec0f051 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Board setup routines for the Motorola/Emerson MVME5100. + * + * Copyright 2013 CSC Australia Pty. Ltd. + * + * Based on earlier code by: + * + * Matt Porter, MontaVista Software Inc. + * Copyright 2001 MontaVista Software Inc. + * + * Author: Stephen Chivers + */ + +#include +#include + +#include +#include +#include +#include +#include + +#define HAWK_MPIC_SIZE 0x00040000U +#define MVME5100_PCI_MEM_OFFSET 0x00000000 + +/* Board register addresses. */ +#define BOARD_STATUS_REG 0xfef88080 +#define BOARD_MODFAIL_REG 0xfef88090 +#define BOARD_MODRST_REG 0xfef880a0 +#define BOARD_TBEN_REG 0xfef880c0 +#define BOARD_SW_READ_REG 0xfef880e0 +#define BOARD_GEO_ADDR_REG 0xfef880e8 +#define BOARD_EXT_FEATURE1_REG 0xfef880f0 +#define BOARD_EXT_FEATURE2_REG 0xfef88100 + +static phys_addr_t pci_membase; +static u_char *restart; + +static void mvme5100_8259_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int cascade_irq = i8259_irq(); + + if (cascade_irq) + generic_handle_irq(cascade_irq); + + chip->irq_eoi(&desc->irq_data); +} + +static void __init mvme5100_pic_init(void) +{ + struct mpic *mpic; + struct device_node *np; + struct device_node *cp = NULL; + unsigned int cirq; + unsigned long intack = 0; + const u32 *prop = NULL; + + np = of_find_node_by_type(NULL, "open-pic"); + if (!np) { + pr_err("Could not find open-pic node\n"); + return; + } + + mpic = mpic_alloc(np, pci_membase, 0, 16, 256, " OpenPIC "); + + BUG_ON(mpic == NULL); + of_node_put(np); + + mpic_assign_isu(mpic, 0, pci_membase + 0x10000); + + mpic_init(mpic); + + cp = of_find_compatible_node(NULL, NULL, "chrp,iic"); + if (cp == NULL) { + pr_warn("mvme5100_pic_init: couldn't find i8259\n"); + return; + } + + cirq = irq_of_parse_and_map(cp, 0); + if (!cirq) { + pr_warn("mvme5100_pic_init: no cascade interrupt?\n"); + return; + } + + np = of_find_compatible_node(NULL, "pci", "mpc10x-pci"); + if (np) { + prop = of_get_property(np, "8259-interrupt-acknowledge", NULL); + + if (prop) + intack = prop[0]; + + of_node_put(np); + } + + if (intack) + pr_debug("mvme5100_pic_init: PCI 8259 intack at 0x%016lx\n", + intack); + + i8259_init(cp, intack); + of_node_put(cp); + irq_set_chained_handler(cirq, mvme5100_8259_cascade); +} + +static int __init mvme5100_add_bridge(struct device_node *dev) +{ + const int *bus_range; + int len; + struct pci_controller *hose; + unsigned short devid; + + pr_info("Adding PCI host bridge %pOF\n", dev); + + bus_range = of_get_property(dev, "bus-range", &len); + + hose = pcibios_alloc_controller(dev); + if (hose == NULL) + return -ENOMEM; + + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + setup_indirect_pci(hose, 0xfe000cf8, 0xfe000cfc, 0); + + pci_process_bridge_OF_ranges(hose, dev, 1); + + early_read_config_word(hose, 0, 0, PCI_DEVICE_ID, &devid); + + if (devid != PCI_DEVICE_ID_MOTOROLA_HAWK) { + pr_err("HAWK PHB not present?\n"); + return 0; + } + + early_read_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_1, &pci_membase); + + if (pci_membase == 0) { + pr_err("HAWK PHB mibar not correctly set?\n"); + return 0; + } + + pr_info("mvme5100_pic_init: pci_membase: %x\n", pci_membase); + + return 0; +} + +static const struct of_device_id mvme5100_of_bus_ids[] __initconst = { + { .compatible = "hawk-bridge", }, + {}, +}; + +/* + * Setup the architecture + */ +static void __init mvme5100_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mvme5100_setup_arch()", 0); + + restart = ioremap(BOARD_MODRST_REG, 4); +} + +static void __init mvme5100_setup_pci(void) +{ + struct device_node *np; + + for_each_compatible_node(np, "pci", "hawk-pci") + mvme5100_add_bridge(np); +} + +static void mvme5100_show_cpuinfo(struct seq_file *m) +{ + seq_puts(m, "Vendor\t\t: Motorola/Emerson\n"); + seq_puts(m, "Machine\t\t: MVME5100\n"); +} + +static void __noreturn mvme5100_restart(char *cmd) +{ + + local_irq_disable(); + mtmsr(mfmsr() | MSR_IP); + + out_8((u_char *) restart, 0x01); + + while (1) + ; +} + +static int __init probe_of_platform_devices(void) +{ + + of_platform_bus_probe(NULL, mvme5100_of_bus_ids, NULL); + return 0; +} + +machine_device_initcall(mvme5100, probe_of_platform_devices); + +define_machine(mvme5100) { + .name = "MVME5100", + .compatible = "MVME5100", + .setup_arch = mvme5100_setup_arch, + .discover_phbs = mvme5100_setup_pci, + .init_IRQ = mvme5100_pic_init, + .show_cpuinfo = mvme5100_show_cpuinfo, + .get_irq = mpic_get_irq, + .restart = mvme5100_restart, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c new file mode 100644 index 0000000000..e49880e8da --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -0,0 +1,121 @@ +/* + * Board setup routines for the storcenter + * + * Copyright 2007 (C) Oyvind Repvik (nail@nslu2-linux.org) + * Copyright 2007 Andy Wilcox, Jon Loeliger + * + * Based on linkstation.c by G. Liakhovetski + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of + * any kind, whether express or implied. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "mpc10x.h" + + +static const struct of_device_id storcenter_of_bus[] __initconst = { + { .name = "soc", }, + {}, +}; + +static int __init storcenter_device_probe(void) +{ + of_platform_bus_probe(NULL, storcenter_of_bus, NULL); + return 0; +} +machine_device_initcall(storcenter, storcenter_device_probe); + + +static int __init storcenter_add_bridge(struct device_node *dev) +{ +#ifdef CONFIG_PCI + int len; + struct pci_controller *hose; + const int *bus_range; + + printk("Adding PCI host bridge %pOF\n", dev); + + hose = pcibios_alloc_controller(dev); + if (hose == NULL) + return -ENOMEM; + + bus_range = of_get_property(dev, "bus-range", &len); + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + setup_indirect_pci(hose, MPC10X_MAPB_CNFG_ADDR, MPC10X_MAPB_CNFG_DATA, 0); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, 1); +#endif + + return 0; +} + +static void __init storcenter_setup_arch(void) +{ + printk(KERN_INFO "IOMEGA StorCenter\n"); +} + +static void __init storcenter_setup_pci(void) +{ + struct device_node *np; + + /* Lookup PCI host bridges */ + for_each_compatible_node(np, "pci", "mpc10x-pci") + storcenter_add_bridge(np); +} + +/* + * Interrupt setup and service. Interrupts on the turbostation come + * from the four PCI slots plus onboard 8241 devices: I2C, DUART. + */ +static void __init storcenter_init_IRQ(void) +{ + struct mpic *mpic; + + mpic = mpic_alloc(NULL, 0, 0, 16, 0, " OpenPIC "); + BUG_ON(mpic == NULL); + + /* + * 16 Serial Interrupts followed by 16 Internal Interrupts. + * I2C is the second internal, so it is at 17, 0x11020. + */ + mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200); + mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000); + + mpic_init(mpic); +} + +static void __noreturn storcenter_restart(char *cmd) +{ + local_irq_disable(); + + /* Set exception prefix high - to the firmware */ + mtmsr(mfmsr() | MSR_IP); + isync(); + + /* Wait for reset to happen */ + for (;;) ; +} + +define_machine(storcenter){ + .name = "IOMEGA StorCenter", + .compatible = "iomega,storcenter", + .setup_arch = storcenter_setup_arch, + .discover_phbs = storcenter_setup_pci, + .init_IRQ = storcenter_init_IRQ, + .get_irq = mpic_get_irq, + .restart = storcenter_restart, +}; diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c new file mode 100644 index 0000000000..221577f32b --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c + * + * udbg serial input/output routines for the USB Gecko adapter. + * Copyright (C) 2008-2009 The GameCube Linux Team + * Copyright (C) 2008,2009 Albert Herranz + */ + +#include + +#include + +#include +#include +#include + +#include "usbgecko_udbg.h" + + +#define EXI_CLK_32MHZ 5 + +#define EXI_CSR 0x00 +#define EXI_CSR_CLKMASK (0x7<<4) +#define EXI_CSR_CLK_32MHZ (EXI_CLK_32MHZ<<4) +#define EXI_CSR_CSMASK (0x7<<7) +#define EXI_CSR_CS_0 (0x1<<7) /* Chip Select 001 */ + +#define EXI_CR 0x0c +#define EXI_CR_TSTART (1<<0) +#define EXI_CR_WRITE (1<<2) +#define EXI_CR_READ_WRITE (2<<2) +#define EXI_CR_TLEN(len) (((len)-1)<<4) + +#define EXI_DATA 0x10 + +#define UG_READ_ATTEMPTS 100 +#define UG_WRITE_ATTEMPTS 100 + + +static void __iomem *ug_io_base; + +/* + * Performs one input/output transaction between the exi host and the usbgecko. + */ +static u32 ug_io_transaction(u32 in) +{ + u32 __iomem *csr_reg = ug_io_base + EXI_CSR; + u32 __iomem *data_reg = ug_io_base + EXI_DATA; + u32 __iomem *cr_reg = ug_io_base + EXI_CR; + u32 csr, data, cr; + + /* select */ + csr = EXI_CSR_CLK_32MHZ | EXI_CSR_CS_0; + out_be32(csr_reg, csr); + + /* read/write */ + data = in; + out_be32(data_reg, data); + cr = EXI_CR_TLEN(2) | EXI_CR_READ_WRITE | EXI_CR_TSTART; + out_be32(cr_reg, cr); + + while (in_be32(cr_reg) & EXI_CR_TSTART) + barrier(); + + /* deselect */ + out_be32(csr_reg, 0); + + /* result */ + data = in_be32(data_reg); + + return data; +} + +/* + * Returns true if an usbgecko adapter is found. + */ +static int ug_is_adapter_present(void) +{ + if (!ug_io_base) + return 0; + + return ug_io_transaction(0x90000000) == 0x04700000; +} + +/* + * Returns true if the TX fifo is ready for transmission. + */ +static int ug_is_txfifo_ready(void) +{ + return ug_io_transaction(0xc0000000) & 0x04000000; +} + +/* + * Tries to transmit a character. + * If the TX fifo is not ready the result is undefined. + */ +static void ug_raw_putc(char ch) +{ + ug_io_transaction(0xb0000000 | (ch << 20)); +} + +/* + * Transmits a character. + * It silently fails if the TX fifo is not ready after a number of retries. + */ +static void ug_putc(char ch) +{ + int count = UG_WRITE_ATTEMPTS; + + if (!ug_io_base) + return; + + if (ch == '\n') + ug_putc('\r'); + + while (!ug_is_txfifo_ready() && count--) + barrier(); + if (count >= 0) + ug_raw_putc(ch); +} + +/* + * Returns true if the RX fifo is ready for transmission. + */ +static int ug_is_rxfifo_ready(void) +{ + return ug_io_transaction(0xd0000000) & 0x04000000; +} + +/* + * Tries to receive a character. + * If a character is unavailable the function returns -1. + */ +static int ug_raw_getc(void) +{ + u32 data = ug_io_transaction(0xa0000000); + if (data & 0x08000000) + return (data >> 16) & 0xff; + else + return -1; +} + +/* + * Receives a character. + * It fails if the RX fifo is not ready after a number of retries. + */ +static int ug_getc(void) +{ + int count = UG_READ_ATTEMPTS; + + if (!ug_io_base) + return -1; + + while (!ug_is_rxfifo_ready() && count--) + barrier(); + return ug_raw_getc(); +} + +/* + * udbg functions. + * + */ + +/* + * Transmits a character. + */ +static void ug_udbg_putc(char ch) +{ + ug_putc(ch); +} + +/* + * Receives a character. Waits until a character is available. + */ +static int ug_udbg_getc(void) +{ + int ch; + + while ((ch = ug_getc()) == -1) + barrier(); + return ch; +} + +/* + * Receives a character. If a character is not available, returns -1. + */ +static int ug_udbg_getc_poll(void) +{ + if (!ug_is_rxfifo_ready()) + return -1; + return ug_getc(); +} + +/* + * Checks if a USB Gecko adapter is inserted in any memory card slot. + */ +static void __iomem *__init ug_udbg_probe(void __iomem *exi_io_base) +{ + int i; + + /* look for a usbgecko on memcard slots A and B */ + for (i = 0; i < 2; i++) { + ug_io_base = exi_io_base + 0x14 * i; + if (ug_is_adapter_present()) + break; + } + if (i == 2) + ug_io_base = NULL; + return ug_io_base; + +} + +/* + * USB Gecko udbg support initialization. + */ +void __init ug_udbg_init(void) +{ + struct device_node *np; + void __iomem *exi_io_base; + + if (ug_io_base) + udbg_printf("%s: early -> final\n", __func__); + + np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-exi"); + if (!np) { + udbg_printf("%s: EXI node not found\n", __func__); + goto out; + } + + exi_io_base = of_iomap(np, 0); + if (!exi_io_base) { + udbg_printf("%s: failed to setup EXI io base\n", __func__); + goto done; + } + + if (!ug_udbg_probe(exi_io_base)) { + udbg_printf("usbgecko_udbg: not found\n"); + iounmap(exi_io_base); + } else { + udbg_putc = ug_udbg_putc; + udbg_getc = ug_udbg_getc; + udbg_getc_poll = ug_udbg_getc_poll; + udbg_printf("usbgecko_udbg: ready\n"); + } + +done: + of_node_put(np); +out: + return; +} + +#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO + +static phys_addr_t __init ug_early_grab_io_addr(void) +{ +#if defined(CONFIG_GAMECUBE) + return 0x0c000000; +#elif defined(CONFIG_WII) + return 0x0d000000; +#else +#error Invalid platform for USB Gecko based early debugging. +#endif +} + +/* + * USB Gecko early debug support initialization for udbg. + */ +void __init udbg_init_usbgecko(void) +{ + void __iomem *early_debug_area; + void __iomem *exi_io_base; + + /* + * At this point we have a BAT already setup that enables I/O + * to the EXI hardware. + * + * The BAT uses a virtual address range reserved at the fixmap. + * This must match the virtual address configured in + * head_32.S:setup_usbgecko_bat(). + */ + early_debug_area = (void __iomem *)__fix_to_virt(FIX_EARLY_DEBUG_BASE); + exi_io_base = early_debug_area + 0x00006800; + + /* try to detect a USB Gecko */ + if (!ug_udbg_probe(exi_io_base)) + return; + + /* we found a USB Gecko, load udbg hooks */ + udbg_putc = ug_udbg_putc; + udbg_getc = ug_udbg_getc; + udbg_getc_poll = ug_udbg_getc_poll; + + /* + * Prepare again the same BAT for MMU_init. + * This allows udbg I/O to continue working after the MMU is + * turned on for real. + * It is safe to continue using the same virtual address as it is + * a reserved fixmap area. + */ + setbat(1, (unsigned long)early_debug_area, + ug_early_grab_io_addr(), 128*1024, PAGE_KERNEL_NCG); +} + +#endif /* CONFIG_PPC_EARLY_DEBUG_USBGECKO */ + diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h new file mode 100644 index 0000000000..bceb11911e --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h + * + * udbg serial input/output routines for the USB Gecko adapter. + * Copyright (C) 2008-2009 The GameCube Linux Team + * Copyright (C) 2008,2009 Albert Herranz + */ + +#ifndef __USBGECKO_UDBG_H +#define __USBGECKO_UDBG_H + +#ifdef CONFIG_USBGECKO_UDBG + +extern void __init ug_udbg_init(void); + +#else + +static inline void __init ug_udbg_init(void) +{ +} + +#endif /* CONFIG_USBGECKO_UDBG */ + +void __init udbg_init_usbgecko(void); + +#endif /* __USBGECKO_UDBG_H */ diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c new file mode 100644 index 0000000000..cb3be6d6e3 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/embedded6xx/wii.c + * + * Nintendo Wii board-specific support + * Copyright (C) 2008-2009 The GameCube Linux Team + * Copyright (C) 2008,2009 Albert Herranz + */ +#define DRV_MODULE_NAME "wii" +#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "flipper-pic.h" +#include "hlwd-pic.h" +#include "usbgecko_udbg.h" + +/* control block */ +#define HW_CTRL_COMPATIBLE "nintendo,hollywood-control" + +#define HW_CTRL_RESETS 0x94 +#define HW_CTRL_RESETS_SYS (1<<0) + +/* gpio */ +#define HW_GPIO_COMPATIBLE "nintendo,hollywood-gpio" + +#define HW_GPIO_BASE(idx) (idx * 0x20) +#define HW_GPIO_OUT(idx) (HW_GPIO_BASE(idx) + 0) +#define HW_GPIO_DIR(idx) (HW_GPIO_BASE(idx) + 4) +#define HW_GPIO_OWNER (HW_GPIO_BASE(1) + 0x1c) + +#define HW_GPIO_SHUTDOWN (1<<1) +#define HW_GPIO_SLOT_LED (1<<5) +#define HW_GPIO_SENSOR_BAR (1<<8) + + +static void __iomem *hw_ctrl; +static void __iomem *hw_gpio; + +static void __noreturn wii_spin(void) +{ + local_irq_disable(); + for (;;) + cpu_relax(); +} + +static void __iomem *__init wii_ioremap_hw_regs(char *name, char *compatible) +{ + void __iomem *hw_regs = NULL; + struct device_node *np; + struct resource res; + int error = -ENODEV; + + np = of_find_compatible_node(NULL, NULL, compatible); + if (!np) { + pr_err("no compatible node found for %s\n", compatible); + goto out; + } + error = of_address_to_resource(np, 0, &res); + if (error) { + pr_err("no valid reg found for %pOFn\n", np); + goto out_put; + } + + hw_regs = ioremap(res.start, resource_size(&res)); + if (hw_regs) { + pr_info("%s at 0x%pa mapped to 0x%p\n", name, + &res.start, hw_regs); + } + +out_put: + of_node_put(np); +out: + return hw_regs; +} + +static void __init wii_setup_arch(void) +{ + hw_ctrl = wii_ioremap_hw_regs("hw_ctrl", HW_CTRL_COMPATIBLE); + hw_gpio = wii_ioremap_hw_regs("hw_gpio", HW_GPIO_COMPATIBLE); + if (hw_gpio) { + /* turn off the front blue led and IR light */ + clrbits32(hw_gpio + HW_GPIO_OUT(0), + HW_GPIO_SLOT_LED | HW_GPIO_SENSOR_BAR); + } +} + +static void __noreturn wii_restart(char *cmd) +{ + local_irq_disable(); + + if (hw_ctrl) { + /* clear the system reset pin to cause a reset */ + clrbits32(hw_ctrl + HW_CTRL_RESETS, HW_CTRL_RESETS_SYS); + } + wii_spin(); +} + +static void wii_power_off(void) +{ + local_irq_disable(); + + if (hw_gpio) { + /* + * set the owner of the shutdown pin to ARM, because it is + * accessed through the registers for the ARM, below + */ + clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN); + + /* make sure that the poweroff GPIO is configured as output */ + setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN); + + /* drive the poweroff GPIO high */ + setbits32(hw_gpio + HW_GPIO_OUT(1), HW_GPIO_SHUTDOWN); + } + wii_spin(); +} + +static void __noreturn wii_halt(void) +{ + if (ppc_md.restart) + ppc_md.restart(NULL); + wii_spin(); +} + +static void __init wii_pic_probe(void) +{ + flipper_pic_probe(); + hlwd_pic_probe(); +} + +static int __init wii_probe(void) +{ + pm_power_off = wii_power_off; + + ug_udbg_init(); + + return 1; +} + +static void wii_shutdown(void) +{ + hlwd_quiesce(); + flipper_quiesce(); +} + +static const struct of_device_id wii_of_bus[] = { + { .compatible = "nintendo,hollywood", }, + { }, +}; + +static int __init wii_device_probe(void) +{ + of_platform_populate(NULL, wii_of_bus, NULL, NULL); + return 0; +} +machine_device_initcall(wii, wii_device_probe); + +define_machine(wii) { + .name = "wii", + .compatible = "nintendo,wii", + .probe = wii_probe, + .setup_arch = wii_setup_arch, + .restart = wii_restart, + .halt = wii_halt, + .init_IRQ = wii_pic_probe, + .get_irq = flipper_pic_get_irq, + .progress = udbg_progress, + .machine_shutdown = wii_shutdown, +}; diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c new file mode 100644 index 0000000000..b8d37a9932 --- /dev/null +++ b/arch/powerpc/platforms/fsl_uli1575.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ULI M1575 setup code - specific to Freescale boards + * + * Copyright 2007 Freescale Semiconductor Inc. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#define ULI_PIRQA 0x08 +#define ULI_PIRQB 0x09 +#define ULI_PIRQC 0x0a +#define ULI_PIRQD 0x0b +#define ULI_PIRQE 0x0c +#define ULI_PIRQF 0x0d +#define ULI_PIRQG 0x0e + +#define ULI_8259_NONE 0x00 +#define ULI_8259_IRQ1 0x08 +#define ULI_8259_IRQ3 0x02 +#define ULI_8259_IRQ4 0x04 +#define ULI_8259_IRQ5 0x05 +#define ULI_8259_IRQ6 0x07 +#define ULI_8259_IRQ7 0x06 +#define ULI_8259_IRQ9 0x01 +#define ULI_8259_IRQ10 0x03 +#define ULI_8259_IRQ11 0x09 +#define ULI_8259_IRQ12 0x0b +#define ULI_8259_IRQ14 0x0d +#define ULI_8259_IRQ15 0x0f + +static u8 uli_pirq_to_irq[8] = { + ULI_8259_IRQ9, /* PIRQA */ + ULI_8259_IRQ10, /* PIRQB */ + ULI_8259_IRQ11, /* PIRQC */ + ULI_8259_IRQ12, /* PIRQD */ + ULI_8259_IRQ5, /* PIRQE */ + ULI_8259_IRQ6, /* PIRQF */ + ULI_8259_IRQ7, /* PIRQG */ + ULI_8259_NONE, /* PIRQH */ +}; + +static inline bool is_quirk_valid(void) +{ + return (machine_is(mpc86xx_hpcn) || + machine_is(mpc8544_ds) || + machine_is(p2020_ds) || + machine_is(mpc8572_ds)); +} + +/* Bridge */ +static void early_uli5249(struct pci_dev *dev) +{ + unsigned char temp; + + if (!is_quirk_valid()) + return; + + pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_IO | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + /* read/write lock */ + pci_read_config_byte(dev, 0x7c, &temp); + pci_write_config_byte(dev, 0x7c, 0x80); + + /* set as P2P bridge */ + pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01); + dev->class |= 0x1; + + /* restore lock */ + pci_write_config_byte(dev, 0x7c, temp); +} + + +static void quirk_uli1575(struct pci_dev *dev) +{ + int i; + + if (!is_quirk_valid()) + return; + + /* + * ULI1575 interrupts route setup + */ + + /* ULI1575 IRQ mapping conf register maps PIRQx to IRQn */ + for (i = 0; i < 4; i++) { + u8 val = uli_pirq_to_irq[i*2] | (uli_pirq_to_irq[i*2+1] << 4); + pci_write_config_byte(dev, 0x48 + i, val); + } + + /* USB 1.1 OHCI controller 1: dev 28, func 0 - IRQ12 */ + pci_write_config_byte(dev, 0x86, ULI_PIRQD); + + /* USB 1.1 OHCI controller 2: dev 28, func 1 - IRQ9 */ + pci_write_config_byte(dev, 0x87, ULI_PIRQA); + + /* USB 1.1 OHCI controller 3: dev 28, func 2 - IRQ10 */ + pci_write_config_byte(dev, 0x88, ULI_PIRQB); + + /* Lan controller: dev 27, func 0 - IRQ6 */ + pci_write_config_byte(dev, 0x89, ULI_PIRQF); + + /* AC97 Audio controller: dev 29, func 0 - IRQ6 */ + pci_write_config_byte(dev, 0x8a, ULI_PIRQF); + + /* Modem controller: dev 29, func 1 - IRQ6 */ + pci_write_config_byte(dev, 0x8b, ULI_PIRQF); + + /* HD Audio controller: dev 29, func 2 - IRQ6 */ + pci_write_config_byte(dev, 0x8c, ULI_PIRQF); + + /* SATA controller: dev 31, func 1 - IRQ5 */ + pci_write_config_byte(dev, 0x8d, ULI_PIRQE); + + /* SMB interrupt: dev 30, func 1 - IRQ7 */ + pci_write_config_byte(dev, 0x8e, ULI_PIRQG); + + /* PMU ACPI SCI interrupt: dev 30, func 2 - IRQ7 */ + pci_write_config_byte(dev, 0x8f, ULI_PIRQG); + + /* USB 2.0 controller: dev 28, func 3 */ + pci_write_config_byte(dev, 0x74, ULI_8259_IRQ11); + + /* Primary PATA IDE IRQ: 14 + * Secondary PATA IDE IRQ: 15 + */ + pci_write_config_byte(dev, 0x44, 0x30 | ULI_8259_IRQ14); + pci_write_config_byte(dev, 0x75, ULI_8259_IRQ15); +} + +static void quirk_final_uli1575(struct pci_dev *dev) +{ + /* Set i8259 interrupt trigger + * IRQ 3: Level + * IRQ 4: Level + * IRQ 5: Level + * IRQ 6: Level + * IRQ 7: Level + * IRQ 9: Level + * IRQ 10: Level + * IRQ 11: Level + * IRQ 12: Level + * IRQ 14: Edge + * IRQ 15: Edge + */ + if (!is_quirk_valid()) + return; + + outb(0xfa, 0x4d0); + outb(0x1e, 0x4d1); + + /* setup RTC */ + CMOS_WRITE(RTC_SET, RTC_CONTROL); + CMOS_WRITE(RTC_24H, RTC_CONTROL); + + /* ensure month, date, and week alarm fields are ignored */ + CMOS_WRITE(0, RTC_VALID); + + outb_p(0x7c, 0x72); + outb_p(RTC_ALARM_DONT_CARE, 0x73); + + outb_p(0x7d, 0x72); + outb_p(RTC_ALARM_DONT_CARE, 0x73); +} + +/* SATA */ +static void quirk_uli5288(struct pci_dev *dev) +{ + unsigned char c; + unsigned int d; + + if (!is_quirk_valid()) + return; + + /* read/write lock */ + pci_read_config_byte(dev, 0x83, &c); + pci_write_config_byte(dev, 0x83, c|0x80); + + pci_read_config_dword(dev, PCI_CLASS_REVISION, &d); + d = (d & 0xff) | (PCI_CLASS_STORAGE_SATA_AHCI << 8); + pci_write_config_dword(dev, PCI_CLASS_REVISION, d); + + /* restore lock */ + pci_write_config_byte(dev, 0x83, c); + + /* disable emulated PATA mode enabled */ + pci_read_config_byte(dev, 0x84, &c); + pci_write_config_byte(dev, 0x84, c & ~0x01); +} + +/* PATA */ +static void quirk_uli5229(struct pci_dev *dev) +{ + unsigned short temp; + + if (!is_quirk_valid()) + return; + + pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE | + PCI_COMMAND_MASTER | PCI_COMMAND_IO); + + /* Enable Native IRQ 14/15 */ + pci_read_config_word(dev, 0x4a, &temp); + pci_write_config_word(dev, 0x4a, temp | 0x1000); +} + +/* We have to do a dummy read on the P2P for the RTC to work, WTF */ +static void quirk_final_uli5249(struct pci_dev *dev) +{ + int i; + u8 *dummy; + struct pci_bus *bus = dev->bus; + struct resource *res; + resource_size_t end = 0; + + for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCES+3; i++) { + unsigned long flags = pci_resource_flags(dev, i); + if ((flags & (IORESOURCE_MEM|IORESOURCE_PREFETCH)) == IORESOURCE_MEM) + end = pci_resource_end(dev, i); + } + + pci_bus_for_each_resource(bus, res, i) { + if (res && res->flags & IORESOURCE_MEM) { + if (res->end == end) + dummy = ioremap(res->start, 0x4); + else + dummy = ioremap(res->end - 3, 0x4); + if (dummy) { + in_8(dummy); + iounmap(dummy); + } + break; + } + } +} + +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5249, quirk_final_uli5249); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x1575, quirk_final_uli1575); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229); + +static void hpcd_quirk_uli1575(struct pci_dev *dev) +{ + u32 temp32; + + if (!machine_is(mpc86xx_hpcd)) + return; + + /* Disable INTx */ + pci_read_config_dword(dev, 0x48, &temp32); + pci_write_config_dword(dev, 0x48, (temp32 | 1<<26)); + + /* Enable sideband interrupt */ + pci_read_config_dword(dev, 0x90, &temp32); + pci_write_config_dword(dev, 0x90, (temp32 | 1<<22)); +} + +static void hpcd_quirk_uli5288(struct pci_dev *dev) +{ + unsigned char c; + + if (!machine_is(mpc86xx_hpcd)) + return; + + pci_read_config_byte(dev, 0x83, &c); + c |= 0x80; + pci_write_config_byte(dev, 0x83, c); + + pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01); + pci_write_config_byte(dev, PCI_CLASS_DEVICE, 0x06); + + pci_read_config_byte(dev, 0x83, &c); + c &= 0x7f; + pci_write_config_byte(dev, 0x83, c); +} + +/* + * Since 8259PIC was disabled on the board, the IDE device can not + * use the legacy IRQ, we need to let the IDE device work under + * native mode and use the interrupt line like other PCI devices. + * IRQ14 is a sideband interrupt from IDE device to CPU and we use this + * as the interrupt for IDE device. + */ +static void hpcd_quirk_uli5229(struct pci_dev *dev) +{ + unsigned char c; + + if (!machine_is(mpc86xx_hpcd)) + return; + + pci_read_config_byte(dev, 0x4b, &c); + c |= 0x10; + pci_write_config_byte(dev, 0x4b, c); +} + +/* + * SATA interrupt pin bug fix + * There's a chip bug for 5288, The interrupt pin should be 2, + * not the read only value 1, So it use INTB#, not INTA# which + * actually used by the IDE device 5229. + * As of this bug, during the PCI initialization, 5288 read the + * irq of IDE device from the device tree, this function fix this + * bug by re-assigning a correct irq to 5288. + * + */ +static void hpcd_final_uli5288(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct device_node *hosenode = hose ? hose->dn : NULL; + struct of_phandle_args oirq; + u32 laddr[3]; + + if (!machine_is(mpc86xx_hpcd)) + return; + + if (!hosenode) + return; + + oirq.np = hosenode; + oirq.args[0] = 2; + oirq.args_count = 1; + laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8); + laddr[1] = laddr[2] = 0; + of_irq_parse_raw(laddr, &oirq); + dev->irq = irq_create_of_mapping(&oirq); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, hpcd_quirk_uli5288); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, hpcd_quirk_uli5229); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5288, hpcd_final_uli5288); + +static int uli_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn) +{ + if (hose->dn == fsl_pci_primary && bus == (hose->first_busno + 2)) { + /* exclude Modem controller */ + if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 1)) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* exclude HD Audio controller */ + if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 2)) + return PCIBIOS_DEVICE_NOT_FOUND; + } + + return PCIBIOS_SUCCESSFUL; +} + +void __init uli_init(void) +{ + struct device_node *node; + struct device_node *pci_with_uli; + + /* See if we have a ULI under the primary */ + + node = of_find_node_by_name(NULL, "uli1575"); + while ((pci_with_uli = of_get_parent(node))) { + of_node_put(node); + node = pci_with_uli; + + if (pci_with_uli == fsl_pci_primary) { + ppc_md.pci_exclude_device = uli_exclude_device; + break; + } + } +} diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig new file mode 100644 index 0000000000..4c058cc57c --- /dev/null +++ b/arch/powerpc/platforms/maple/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MAPLE + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN + bool "Maple 970FX Evaluation Board" + select FORCE_PCI + select MPIC + select U3_DART + select MPIC_U3_HT_IRQS + select GENERIC_TBSYNC + select PPC_UDBG_16550 + select PPC_970_NAP + select PPC_64S_HASH_MMU + select PPC_HASH_MMU_NATIVE + select PPC_RTAS + select MMIO_NVRAM + select ATA_NONSTANDARD if ATA + help + This option enables support for the Maple 970FX Evaluation Board. + For more information, refer to diff --git a/arch/powerpc/platforms/maple/Makefile b/arch/powerpc/platforms/maple/Makefile new file mode 100644 index 0000000000..19f35ab828 --- /dev/null +++ b/arch/powerpc/platforms/maple/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += setup.o pci.o time.o diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h new file mode 100644 index 0000000000..4f358b55c3 --- /dev/null +++ b/arch/powerpc/platforms/maple/maple.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Declarations for maple-specific code. + * + * Maple is the name of a PPC970 evaluation board. + */ +extern int maple_set_rtc_time(struct rtc_time *tm); +extern void maple_get_rtc_time(struct rtc_time *tm); +extern time64_t maple_get_boot_time(void); +extern void maple_calibrate_decr(void); +extern void maple_pci_init(void); +extern void maple_pci_irq_fixup(struct pci_dev *dev); +extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); + +extern struct pci_controller_ops maple_pci_controller_ops; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c new file mode 100644 index 0000000000..b911b31717 --- /dev/null +++ b/arch/powerpc/platforms/maple/pci.c @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), + * IBM Corp. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "maple.h" + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; + +static int __init fixup_one_level_bus_range(struct device_node *node, int higher) +{ + for (; node; node = node->sibling) { + const int *bus_range; + const unsigned int *class_code; + int len; + + /* For PCI<->PCI bridges or CardBus bridges, we go down */ + class_code = of_get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + bus_range = of_get_property(node, "bus-range", &len); + if (bus_range != NULL && len > 2 * sizeof(int)) { + if (bus_range[1] > higher) + higher = bus_range[1]; + } + higher = fixup_one_level_bus_range(node->child, higher); + } + return higher; +} + +/* This routine fixes the "bus-range" property of all bridges in the + * system since they tend to have their "last" member wrong on macs + * + * Note that the bus numbers manipulated here are OF bus numbers, they + * are not Linux bus numbers. + */ +static void __init fixup_bus_range(struct device_node *bridge) +{ + int *bus_range; + struct property *prop; + int len; + + /* Lookup the "bus-range" property for the hose */ + prop = of_find_property(bridge, "bus-range", &len); + if (prop == NULL || prop->value == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %pOF\n", + bridge); + return; + } + bus_range = prop->value; + bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); +} + + +static unsigned long u3_agp_cfa0(u8 devfn, u8 off) +{ + return (1 << (unsigned long)PCI_SLOT(devfn)) | + ((unsigned long)PCI_FUNC(devfn) << 8) | + ((unsigned long)off & 0xFCUL); +} + +static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off) +{ + return ((unsigned long)bus << 16) | + ((unsigned long)devfn << 8) | + ((unsigned long)off & 0xFCUL) | + 1UL; +} + +static volatile void __iomem *u3_agp_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, u8 offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) { + if (dev_fn < (11 << 3)) + return NULL; + caddr = u3_agp_cfa0(dev_fn, offset); + } else + caddr = u3_agp_cfa1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= 0x07; + return hose->cfg_data + offset; +} + +static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8(addr); + break; + case 2: + *val = in_le16(addr); + break; + default: + *val = in_le32(addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8(addr, val); + break; + case 2: + out_le16(addr, val); + break; + default: + out_le32(addr, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_agp_pci_ops = +{ + .read = u3_agp_read_config, + .write = u3_agp_write_config, +}; + +static unsigned long u3_ht_cfa0(u8 devfn, u8 off) +{ + return (devfn << 8) | off; +} + +static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off) +{ + return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL; +} + +static volatile void __iomem *u3_ht_cfg_access(struct pci_controller* hose, + u8 bus, u8 devfn, u8 offset) +{ + if (bus == hose->first_busno) { + if (PCI_SLOT(devfn) == 0) + return NULL; + return hose->cfg_data + u3_ht_cfa0(devfn, offset); + } else + return hose->cfg_data + u3_ht_cfa1(bus, devfn, offset); +} + +static int u3_ht_root_read_config(struct pci_controller *hose, u8 offset, + int len, u32 *val) +{ + volatile void __iomem *addr; + + addr = hose->cfg_addr; + addr += ((offset & ~3) << 2) + (4 - len - (offset & 3)); + + switch (len) { + case 1: + *val = in_8(addr); + break; + case 2: + *val = in_be16(addr); + break; + default: + *val = in_be32(addr); + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int u3_ht_root_write_config(struct pci_controller *hose, u8 offset, + int len, u32 val) +{ + volatile void __iomem *addr; + + addr = hose->cfg_addr + ((offset & ~3) << 2) + (4 - len - (offset & 3)); + + if (offset >= PCI_BASE_ADDRESS_0 && offset < PCI_CAPABILITY_LIST) + return PCIBIOS_SUCCESSFUL; + + switch (len) { + case 1: + out_8(addr, val); + break; + case 2: + out_be16(addr, val); + break; + default: + out_be32(addr, val); + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) + return u3_ht_root_read_config(hose, offset, len, val); + + if (offset > 0xff) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8(addr); + break; + case 2: + *val = in_le16(addr); + break; + default: + *val = in_le32(addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) + return u3_ht_root_write_config(hose, offset, len, val); + + if (offset > 0xff) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8(addr, val); + break; + case 2: + out_le16(addr, val); + break; + default: + out_le32(addr, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_ht_pci_ops = +{ + .read = u3_ht_read_config, + .write = u3_ht_write_config, +}; + +static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off) +{ + return (1 << PCI_SLOT(devfn)) | + (PCI_FUNC(devfn) << 8) | + ((off >> 8) << 28) | + (off & 0xfcu); +} + +static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn, + unsigned int off) +{ + return (bus << 16) | + (devfn << 8) | + ((off >> 8) << 28) | + (off & 0xfcu) | 1u; +} + +static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, int offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) + caddr = u4_pcie_cfa0(dev_fn, offset); + else + caddr = u4_pcie_cfa1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= 0x03; + return hose->cfg_data + offset; +} + +static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8(addr); + break; + case 2: + *val = in_le16(addr); + break; + default: + *val = in_le32(addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} +static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + volatile void __iomem *addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8(addr, val); + break; + case 2: + out_le16(addr, val); + break; + default: + out_le32(addr, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u4_pcie_pci_ops = +{ + .read = u4_pcie_read_config, + .write = u4_pcie_write_config, +}; + +static void __init setup_u3_agp(struct pci_controller* hose) +{ + /* On G5, we move AGP up to high bus number so we don't need + * to reassign bus numbers for HT. If we ever have P2P bridges + * on AGP, we'll have to move pci_assign_all_buses to the + * pci_controller structure so we enable it for AGP and not for + * HT childs. + * We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->first_busno = 0xf0; + hose->last_busno = 0xff; + hose->ops = &u3_agp_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + u3_agp = hose; +} + +static void __init setup_u4_pcie(struct pci_controller* hose) +{ + /* We currently only implement the "non-atomic" config space, to + * be optimised later. + */ + hose->ops = &u4_pcie_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + u4_pcie = hose; +} + +static void __init setup_u3_ht(struct pci_controller* hose) +{ + hose->ops = &u3_ht_pci_ops; + + /* We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->cfg_data = ioremap(0xf2000000, 0x02000000); + hose->cfg_addr = ioremap(0xf8070000, 0x1000); + + hose->first_busno = 0; + hose->last_busno = 0xef; + + u3_ht = hose; +} + +static int __init maple_add_bridge(struct device_node *dev) +{ + int len; + struct pci_controller *hose; + char* disp_name; + const int *bus_range; + int primary = 1; + + DBG("Adding PCI host bridge %pOF\n", dev); + + bus_range = of_get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n", + dev); + } + + hose = pcibios_alloc_controller(dev); + if (hose == NULL) + return -ENOMEM; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + hose->controller_ops = maple_pci_controller_ops; + + disp_name = NULL; + if (of_device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose); + disp_name = "U3-AGP"; + primary = 0; + } else if (of_device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose); + disp_name = "U3-HT"; + primary = 1; + } else if (of_device_is_compatible(dev, "u4-pcie")) { + setup_u4_pcie(hose); + disp_name = "U4-PCIE"; + primary = 0; + } + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", + disp_name, hose->first_busno, hose->last_busno); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); + + /* Check for legacy IOs */ + isa_bridge_find_early(hose); + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); + + return 0; +} + + +void maple_pci_irq_fixup(struct pci_dev *dev) +{ + DBG(" -> maple_pci_irq_fixup\n"); + + /* Fixup IRQ for PCIe host */ + if (u4_pcie != NULL && dev->bus->number == 0 && + pci_bus_to_host(dev->bus) == u4_pcie) { + printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n"); + dev->irq = irq_create_mapping(NULL, 1); + if (dev->irq) + irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW); + } + + /* Hide AMD8111 IDE interrupt when in legacy mode so + * the driver calls pci_get_legacy_ide_irq() + */ + if (dev->vendor == PCI_VENDOR_ID_AMD && + dev->device == PCI_DEVICE_ID_AMD_8111_IDE && + (dev->class & 5) != 5) { + dev->irq = 0; + } + + DBG(" <- maple_pci_irq_fixup\n"); +} + +static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_controller *hose = pci_bus_to_host(bridge->bus); + struct device_node *np, *child; + + if (hose != u3_agp) + return 0; + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions hopefully. + */ + np = hose->dn; + PCI_DN(np)->busno = 0xf0; + for_each_child_of_node(np, child) + PCI_DN(child)->busno = 0xf0; + + return 0; +} + +void __init maple_pci_init(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + /* Probe root PCI hosts, that is on U3 the AGP host and the + * HyperTransport host. That one is actually "kept" around + * and actually added last as it's resource management relies + * on the AGP resources to have been setup first + */ + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n"); + return; + } + for_each_child_of_node(root, np) { + if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "ht")) + continue; + if ((of_device_is_compatible(np, "u4-pcie") || + of_device_is_compatible(np, "u3-agp")) && + maple_add_bridge(np) == 0) + of_node_get(np); + + if (of_device_is_compatible(np, "u3-ht")) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Now setup the HyperTransport host if we found any + */ + if (ht && maple_add_bridge(ht) != 0) + of_node_put(ht); + + ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare; + + /* Tell pci.c to not change any resource allocations. */ + pci_add_flags(PCI_PROBE_ONLY); +} + +int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) +{ + struct device_node *np; + unsigned int defirq = channel ? 15 : 14; + unsigned int irq; + + if (pdev->vendor != PCI_VENDOR_ID_AMD || + pdev->device != PCI_DEVICE_ID_AMD_8111_IDE) + return defirq; + + np = pci_device_to_OF_node(pdev); + if (np == NULL) { + printk("Failed to locate OF node for IDE %s\n", + pci_name(pdev)); + return defirq; + } + irq = irq_of_parse_and_map(np, channel & 0x1); + if (!irq) { + printk("Failed to map onboard IDE interrupt for channel %d\n", + channel); + return defirq; + } + return irq; +} + +static void quirk_ipr_msi(struct pci_dev *dev) +{ + /* Something prevents MSIs from the IPR from working on Bimini, + * and the driver has no smarts to recover. So disable MSI + * on it for now. */ + + if (machine_is(maple)) { + dev->no_msi = 1; + dev_info(&dev->dev, "Quirk disabled MSI\n"); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN, + quirk_ipr_msi); + +struct pci_controller_ops maple_pci_controller_ops = { +}; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c new file mode 100644 index 0000000000..f329a03edf --- /dev/null +++ b/arch/powerpc/platforms/maple/setup.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Maple (970 eval board) setup code + * + * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), + * IBM Corp. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "maple.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +static unsigned long maple_find_nvram_base(void) +{ + struct device_node *rtcs; + unsigned long result = 0; + + /* find NVRAM device */ + rtcs = of_find_compatible_node(NULL, "nvram", "AMD8111"); + if (rtcs) { + struct resource r; + if (of_address_to_resource(rtcs, 0, &r)) { + printk(KERN_EMERG "Maple: Unable to translate NVRAM" + " address\n"); + goto bail; + } + if (!(r.flags & IORESOURCE_IO)) { + printk(KERN_EMERG "Maple: NVRAM address isn't PIO!\n"); + goto bail; + } + result = r.start; + } else + printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); + bail: + of_node_put(rtcs); + return result; +} + +static void __noreturn maple_restart(char *cmd) +{ + unsigned int maple_nvram_base; + const unsigned int *maple_nvram_offset, *maple_nvram_command; + struct device_node *sp; + + maple_nvram_base = maple_find_nvram_base(); + if (maple_nvram_base == 0) + goto fail; + + /* find service processor device */ + sp = of_find_node_by_name(NULL, "service-processor"); + if (!sp) { + printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); + goto fail; + } + maple_nvram_offset = of_get_property(sp, "restart-addr", NULL); + maple_nvram_command = of_get_property(sp, "restart-value", NULL); + of_node_put(sp); + + /* send command */ + outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); + for (;;) ; + fail: + printk(KERN_EMERG "Maple: Manual Restart Required\n"); + for (;;) ; +} + +static void __noreturn maple_power_off(void) +{ + unsigned int maple_nvram_base; + const unsigned int *maple_nvram_offset, *maple_nvram_command; + struct device_node *sp; + + maple_nvram_base = maple_find_nvram_base(); + if (maple_nvram_base == 0) + goto fail; + + /* find service processor device */ + sp = of_find_node_by_name(NULL, "service-processor"); + if (!sp) { + printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); + goto fail; + } + maple_nvram_offset = of_get_property(sp, "power-off-addr", NULL); + maple_nvram_command = of_get_property(sp, "power-off-value", NULL); + of_node_put(sp); + + /* send command */ + outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); + for (;;) ; + fail: + printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); + for (;;) ; +} + +static void __noreturn maple_halt(void) +{ + maple_power_off(); +} + +#ifdef CONFIG_SMP +static struct smp_ops_t maple_smp_ops = { + .probe = smp_mpic_probe, + .message_pass = smp_mpic_message_pass, + .kick_cpu = smp_generic_kick_cpu, + .setup_cpu = smp_mpic_setup_cpu, + .give_timebase = smp_generic_give_timebase, + .take_timebase = smp_generic_take_timebase, +}; +#endif /* CONFIG_SMP */ + +static void __init maple_use_rtas_reboot_and_halt_if_present(void) +{ + if (rtas_function_implemented(RTAS_FN_SYSTEM_REBOOT) && + rtas_function_implemented(RTAS_FN_POWER_OFF)) { + ppc_md.restart = rtas_restart; + pm_power_off = rtas_power_off; + ppc_md.halt = rtas_halt; + } +} + +static void __init maple_setup_arch(void) +{ + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + /* Setup SMP callback */ +#ifdef CONFIG_SMP + smp_ops = &maple_smp_ops; +#endif + maple_use_rtas_reboot_and_halt_if_present(); + + printk(KERN_DEBUG "Using native/NAP idle loop\n"); + + mmio_nvram_init(); +} + +/* + * This is almost identical to pSeries and CHRP. We need to make that + * code generic at one point, with appropriate bits in the device-tree to + * identify the presence of an HT APIC + */ +static void __init maple_init_IRQ(void) +{ + struct device_node *root, *np, *mpic_node = NULL; + const unsigned int *opprop; + unsigned long openpic_addr = 0; + int naddr, n, i, opplen, has_isus = 0; + struct mpic *mpic; + unsigned int flags = 0; + + /* Locate MPIC in the device-tree. Note that there is a bug + * in Maple device-tree where the type of the controller is + * open-pic and not interrupt-controller + */ + + for_each_node_by_type(np, "interrupt-controller") + if (of_device_is_compatible(np, "open-pic")) { + mpic_node = np; + break; + } + if (mpic_node == NULL) + for_each_node_by_type(np, "open-pic") { + mpic_node = np; + break; + } + if (mpic_node == NULL) { + printk(KERN_ERR + "Failed to locate the MPIC interrupt controller\n"); + return; + } + + /* Find address list in /platform-open-pic */ + root = of_find_node_by_path("/"); + naddr = of_n_addr_cells(root); + opprop = of_get_property(root, "platform-open-pic", &opplen); + if (opprop) { + openpic_addr = of_read_number(opprop, naddr); + has_isus = (opplen > naddr); + printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n", + openpic_addr, has_isus); + } + + BUG_ON(openpic_addr == 0); + + /* Check for a big endian MPIC */ + if (of_property_read_bool(np, "big-endian")) + flags |= MPIC_BIG_ENDIAN; + + /* XXX Maple specific bits */ + flags |= MPIC_U3_HT_IRQS; + /* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */ + flags |= MPIC_BIG_ENDIAN; + + /* Setup the openpic driver. More device-tree junks, we hard code no + * ISUs for now. I'll have to revisit some stuffs with the folks doing + * the firmware for those + */ + mpic = mpic_alloc(mpic_node, openpic_addr, flags, + /*has_isus ? 16 :*/ 0, 0, " MPIC "); + BUG_ON(mpic == NULL); + + /* Add ISUs */ + opplen /= sizeof(u32); + for (n = 0, i = naddr; i < opplen; i += naddr, n++) { + unsigned long isuaddr = of_read_number(opprop + i, naddr); + mpic_assign_isu(mpic, n, isuaddr); + } + + /* All ISUs are setup, complete initialization */ + mpic_init(mpic); + ppc_md.get_irq = mpic_get_irq; + of_node_put(mpic_node); + of_node_put(root); +} + +static void __init maple_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init maple_probe(void) +{ + if (!of_machine_is_compatible("Momentum,Maple") && + !of_machine_is_compatible("Momentum,Apache")) + return 0; + + pm_power_off = maple_power_off; + + iommu_init_early_dart(&maple_pci_controller_ops); + + return 1; +} + +#ifdef CONFIG_EDAC +/* + * Register a platform device for CPC925 memory controller on + * all boards with U3H (CPC925) bridge. + */ +static int __init maple_cpc925_edac_setup(void) +{ + struct platform_device *pdev; + struct device_node *np = NULL; + struct resource r; + int ret; + volatile void __iomem *mem; + u32 rev; + + np = of_find_node_by_type(NULL, "memory-controller"); + if (!np) { + printk(KERN_ERR "%s: Unable to find memory-controller node\n", + __func__); + return -ENODEV; + } + + ret = of_address_to_resource(np, 0, &r); + of_node_put(np); + + if (ret < 0) { + printk(KERN_ERR "%s: Unable to get memory-controller reg\n", + __func__); + return -ENODEV; + } + + mem = ioremap(r.start, resource_size(&r)); + if (!mem) { + printk(KERN_ERR "%s: Unable to map memory-controller memory\n", + __func__); + return -ENOMEM; + } + + rev = __raw_readl(mem); + iounmap(mem); + + if (rev < 0x34 || rev > 0x3f) { /* U3H */ + printk(KERN_ERR "%s: Non-CPC925(U3H) bridge revision: %02x\n", + __func__, rev); + return 0; + } + + pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + printk(KERN_INFO "%s: CPC925 platform device created\n", __func__); + + return 0; +} +machine_device_initcall(maple, maple_cpc925_edac_setup); +#endif + +define_machine(maple) { + .name = "Maple", + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .discover_phbs = maple_pci_init, + .init_IRQ = maple_init_IRQ, + .pci_irq_fixup = maple_pci_irq_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .progress = maple_progress, + .power_save = power4_idle, +}; diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c new file mode 100644 index 0000000000..91606411d2 --- /dev/null +++ b/arch/powerpc/platforms/maple/time.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), + * IBM Corp. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "maple.h" + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +static int maple_rtc_addr; + +static int maple_clock_read(int addr) +{ + outb_p(addr, maple_rtc_addr); + return inb_p(maple_rtc_addr+1); +} + +static void maple_clock_write(unsigned long val, int addr) +{ + outb_p(addr, maple_rtc_addr); + outb_p(val, maple_rtc_addr+1); +} + +void maple_get_rtc_time(struct rtc_time *tm) +{ + do { + tm->tm_sec = maple_clock_read(RTC_SECONDS); + tm->tm_min = maple_clock_read(RTC_MINUTES); + tm->tm_hour = maple_clock_read(RTC_HOURS); + tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH); + tm->tm_mon = maple_clock_read(RTC_MONTH); + tm->tm_year = maple_clock_read(RTC_YEAR); + } while (tm->tm_sec != maple_clock_read(RTC_SECONDS)); + + if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY) + || RTC_ALWAYS_BCD) { + tm->tm_sec = bcd2bin(tm->tm_sec); + tm->tm_min = bcd2bin(tm->tm_min); + tm->tm_hour = bcd2bin(tm->tm_hour); + tm->tm_mday = bcd2bin(tm->tm_mday); + tm->tm_mon = bcd2bin(tm->tm_mon); + tm->tm_year = bcd2bin(tm->tm_year); + } + if ((tm->tm_year + 1900) < 1970) + tm->tm_year += 100; + + tm->tm_wday = -1; +} + +int maple_set_rtc_time(struct rtc_time *tm) +{ + unsigned char save_control, save_freq_select; + int sec, min, hour, mon, mday, year; + + spin_lock(&rtc_lock); + + save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */ + + maple_clock_write((save_control|RTC_SET), RTC_CONTROL); + + save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */ + + maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + sec = tm->tm_sec; + min = tm->tm_min; + hour = tm->tm_hour; + mon = tm->tm_mon; + mday = tm->tm_mday; + year = tm->tm_year; + + if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + sec = bin2bcd(sec); + min = bin2bcd(min); + hour = bin2bcd(hour); + mon = bin2bcd(mon); + mday = bin2bcd(mday); + year = bin2bcd(year); + } + maple_clock_write(sec, RTC_SECONDS); + maple_clock_write(min, RTC_MINUTES); + maple_clock_write(hour, RTC_HOURS); + maple_clock_write(mon, RTC_MONTH); + maple_clock_write(mday, RTC_DAY_OF_MONTH); + maple_clock_write(year, RTC_YEAR); + + /* The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + maple_clock_write(save_control, RTC_CONTROL); + maple_clock_write(save_freq_select, RTC_FREQ_SELECT); + + spin_unlock(&rtc_lock); + + return 0; +} + +static struct resource rtc_iores = { + .name = "rtc", + .flags = IORESOURCE_IO | IORESOURCE_BUSY, +}; + +time64_t __init maple_get_boot_time(void) +{ + struct rtc_time tm; + struct device_node *rtcs; + + rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00"); + if (rtcs) { + struct resource r; + if (of_address_to_resource(rtcs, 0, &r)) { + printk(KERN_EMERG "Maple: Unable to translate RTC" + " address\n"); + goto bail; + } + if (!(r.flags & IORESOURCE_IO)) { + printk(KERN_EMERG "Maple: RTC address isn't PIO!\n"); + goto bail; + } + maple_rtc_addr = r.start; + printk(KERN_INFO "Maple: Found RTC at IO 0x%x\n", + maple_rtc_addr); + } + bail: + of_node_put(rtcs); + if (maple_rtc_addr == 0) { + maple_rtc_addr = RTC_PORT(0); /* legacy address */ + printk(KERN_INFO "Maple: No device node for RTC, assuming " + "legacy address (0x%x)\n", maple_rtc_addr); + } + + rtc_iores.start = maple_rtc_addr; + rtc_iores.end = maple_rtc_addr + 7; + request_resource(&ioport_resource, &rtc_iores); + + maple_get_rtc_time(&tm); + return rtc_tm_to_time64(&tm); +} + diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig new file mode 100644 index 0000000000..6af443a1db --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MICROWATT + depends on PPC_BOOK3S_64 && !SMP + bool "Microwatt SoC platform" + select PPC_XICS + select PPC_ICS_NATIVE + select PPC_ICP_NATIVE + select PPC_UDBG_16550 + help + This option enables support for FPGA-based Microwatt implementations. + diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile new file mode 100644 index 0000000000..116d6d3ad3 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Makefile @@ -0,0 +1 @@ +obj-y += setup.o rng.o diff --git a/arch/powerpc/platforms/microwatt/microwatt.h b/arch/powerpc/platforms/microwatt/microwatt.h new file mode 100644 index 0000000000..335417e95e --- /dev/null +++ b/arch/powerpc/platforms/microwatt/microwatt.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MICROWATT_H +#define _MICROWATT_H + +void microwatt_rng_init(void); + +#endif /* _MICROWATT_H */ diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c new file mode 100644 index 0000000000..8ece87d005 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/powerpc/platforms/powernv/rng.c, which is: + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +#define pr_fmt(fmt) "microwatt-rng: " fmt + +#include +#include +#include +#include +#include +#include "microwatt.h" + +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul + +static int microwatt_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +void __init microwatt_rng_init(void) +{ + unsigned long val; + int i; + + for (i = 0; i < 10; i++) { + if (microwatt_get_random_darn(&val)) { + ppc_md.get_random_seed = microwatt_get_random_darn; + return; + } + } +} diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c new file mode 100644 index 0000000000..5e1c099717 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -0,0 +1,43 @@ +/* + * Microwatt FPGA-based SoC platform setup code. + * + * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "microwatt.h" + +static void __init microwatt_init_IRQ(void) +{ + xics_init(); +} + +static int __init microwatt_populate(void) +{ + return of_platform_default_populate(NULL, NULL, NULL); +} +machine_arch_initcall(microwatt, microwatt_populate); + +static void __init microwatt_setup_arch(void) +{ + microwatt_rng_init(); +} + +define_machine(microwatt) { + .name = "microwatt", + .compatible = "microwatt-soc", + .init_IRQ = microwatt_init_IRQ, + .setup_arch = microwatt_setup_arch, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig new file mode 100644 index 0000000000..85ae18ddd9 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/Kconfig @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_PASEMI + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN + bool "PA Semi SoC-based platforms" + select MPIC + select FORCE_PCI + select PPC_UDBG_16550 + select PPC_64S_HASH_MMU + select PPC_HASH_MMU_NATIVE + select MPIC_BROKEN_REGREAD + help + This option enables support for PA Semi's PWRficient line + of SoC processors, including PA6T-1682M + +menu "PA Semi PWRficient options" + depends on PPC_PASEMI + +config PPC_PASEMI_NEMO + bool "Nemo motherboard Support" + depends on PPC_PASEMI + select PPC_I8259 + help + This option enables support for the 'Nemo' motherboard + used in A-Eons's Amigaone X1000. This consists of some + device tree patches and workarounds for the SB600 South + Bridge that provides SATA/USB/Audio. + +config PPC_PASEMI_IOMMU + bool "PA Semi IOMMU support" + depends on PPC_PASEMI + help + IOMMU support for PA Semi PWRficient + +config PPC_PASEMI_IOMMU_DMA_FORCE + bool "Force DMA engine to use IOMMU" + depends on PPC_PASEMI_IOMMU + help + This option forces the use of the IOMMU also for the + DMA engine. Otherwise the kernel will use it only when + running under a hypervisor. + + If in doubt, say "N". + +config PPC_PASEMI_MDIO + depends on PHYLIB + tristate "MDIO support via GPIO" + default y + help + Driver for MDIO via GPIO on PWRficient platforms + +endmenu diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile new file mode 100644 index 0000000000..d2ce954a50 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o +obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o +obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c new file mode 100644 index 0000000000..1be1f18f6f --- /dev/null +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Common functions for DMA access on PA Semi PWRficient + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define MAX_TXCH 64 +#define MAX_RXCH 64 +#define MAX_FLAGS 64 +#define MAX_FUN 8 + +static struct pasdma_status *dma_status; + +static void __iomem *iob_regs; +static void __iomem *mac_regs[6]; +static void __iomem *dma_regs; + +static int base_hw_irq; + +static int num_txch, num_rxch; + +static struct pci_dev *dma_pdev; + +/* Bitmaps to handle allocation of channels */ + +static DECLARE_BITMAP(txch_free, MAX_TXCH); +static DECLARE_BITMAP(rxch_free, MAX_RXCH); +static DECLARE_BITMAP(flags_free, MAX_FLAGS); +static DECLARE_BITMAP(fun_free, MAX_FUN); + +/* pasemi_read_iob_reg - read IOB register + * @reg: Register to read (offset into PCI CFG space) + */ +unsigned int pasemi_read_iob_reg(unsigned int reg) +{ + return in_le32(iob_regs+reg); +} +EXPORT_SYMBOL(pasemi_read_iob_reg); + +/* pasemi_write_iob_reg - write IOB register + * @reg: Register to write to (offset into PCI CFG space) + * @val: Value to write + */ +void pasemi_write_iob_reg(unsigned int reg, unsigned int val) +{ + out_le32(iob_regs+reg, val); +} +EXPORT_SYMBOL(pasemi_write_iob_reg); + +/* pasemi_read_mac_reg - read MAC register + * @intf: MAC interface + * @reg: Register to read (offset into PCI CFG space) + */ +unsigned int pasemi_read_mac_reg(int intf, unsigned int reg) +{ + return in_le32(mac_regs[intf]+reg); +} +EXPORT_SYMBOL(pasemi_read_mac_reg); + +/* pasemi_write_mac_reg - write MAC register + * @intf: MAC interface + * @reg: Register to write to (offset into PCI CFG space) + * @val: Value to write + */ +void pasemi_write_mac_reg(int intf, unsigned int reg, unsigned int val) +{ + out_le32(mac_regs[intf]+reg, val); +} +EXPORT_SYMBOL(pasemi_write_mac_reg); + +/* pasemi_read_dma_reg - read DMA register + * @reg: Register to read (offset into PCI CFG space) + */ +unsigned int pasemi_read_dma_reg(unsigned int reg) +{ + return in_le32(dma_regs+reg); +} +EXPORT_SYMBOL(pasemi_read_dma_reg); + +/* pasemi_write_dma_reg - write DMA register + * @reg: Register to write to (offset into PCI CFG space) + * @val: Value to write + */ +void pasemi_write_dma_reg(unsigned int reg, unsigned int val) +{ + out_le32(dma_regs+reg, val); +} +EXPORT_SYMBOL(pasemi_write_dma_reg); + +static int pasemi_alloc_tx_chan(enum pasemi_dmachan_type type) +{ + int bit; + int start, limit; + + switch (type & (TXCHAN_EVT0|TXCHAN_EVT1)) { + case TXCHAN_EVT0: + start = 0; + limit = 10; + break; + case TXCHAN_EVT1: + start = 10; + limit = MAX_TXCH; + break; + default: + start = 0; + limit = MAX_TXCH; + break; + } +retry: + bit = find_next_bit(txch_free, MAX_TXCH, start); + if (bit >= limit) + return -ENOSPC; + if (!test_and_clear_bit(bit, txch_free)) + goto retry; + + return bit; +} + +static void pasemi_free_tx_chan(int chan) +{ + BUG_ON(test_bit(chan, txch_free)); + set_bit(chan, txch_free); +} + +static int pasemi_alloc_rx_chan(void) +{ + int bit; +retry: + bit = find_first_bit(rxch_free, MAX_RXCH); + if (bit >= MAX_TXCH) + return -ENOSPC; + if (!test_and_clear_bit(bit, rxch_free)) + goto retry; + + return bit; +} + +static void pasemi_free_rx_chan(int chan) +{ + BUG_ON(test_bit(chan, rxch_free)); + set_bit(chan, rxch_free); +} + +/* pasemi_dma_alloc_chan - Allocate a DMA channel + * @type: Type of channel to allocate + * @total_size: Total size of structure to allocate (to allow for more + * room behind the structure to be used by the client) + * @offset: Offset in bytes from start of the total structure to the beginning + * of struct pasemi_dmachan. Needed when struct pasemi_dmachan is + * not the first member of the client structure. + * + * pasemi_dma_alloc_chan allocates a DMA channel for use by a client. The + * type argument specifies whether it's a RX or TX channel, and in the case + * of TX channels which group it needs to belong to (if any). + * + * Returns a pointer to the total structure allocated on success, NULL + * on failure. + */ +void *pasemi_dma_alloc_chan(enum pasemi_dmachan_type type, + int total_size, int offset) +{ + void *buf; + struct pasemi_dmachan *chan; + int chno; + + BUG_ON(total_size < sizeof(struct pasemi_dmachan)); + + buf = kzalloc(total_size, GFP_KERNEL); + + if (!buf) + return NULL; + chan = buf + offset; + + chan->priv = buf; + + switch (type & (TXCHAN|RXCHAN)) { + case RXCHAN: + chno = pasemi_alloc_rx_chan(); + chan->chno = chno; + chan->irq = irq_create_mapping(NULL, + base_hw_irq + num_txch + chno); + chan->status = &dma_status->rx_sta[chno]; + break; + case TXCHAN: + chno = pasemi_alloc_tx_chan(type); + chan->chno = chno; + chan->irq = irq_create_mapping(NULL, base_hw_irq + chno); + chan->status = &dma_status->tx_sta[chno]; + break; + } + + chan->chan_type = type; + + return chan; +} +EXPORT_SYMBOL(pasemi_dma_alloc_chan); + +/* pasemi_dma_free_chan - Free a previously allocated channel + * @chan: Channel to free + * + * Frees a previously allocated channel. It will also deallocate any + * descriptor ring associated with the channel, if allocated. + */ +void pasemi_dma_free_chan(struct pasemi_dmachan *chan) +{ + if (chan->ring_virt) + pasemi_dma_free_ring(chan); + + switch (chan->chan_type & (RXCHAN|TXCHAN)) { + case RXCHAN: + pasemi_free_rx_chan(chan->chno); + break; + case TXCHAN: + pasemi_free_tx_chan(chan->chno); + break; + } + + kfree(chan->priv); +} +EXPORT_SYMBOL(pasemi_dma_free_chan); + +/* pasemi_dma_alloc_ring - Allocate descriptor ring for a channel + * @chan: Channel for which to allocate + * @ring_size: Ring size in 64-bit (8-byte) words + * + * Allocate a descriptor ring for a channel. Returns 0 on success, errno + * on failure. The passed in struct pasemi_dmachan is updated with the + * virtual and DMA addresses of the ring. + */ +int pasemi_dma_alloc_ring(struct pasemi_dmachan *chan, int ring_size) +{ + BUG_ON(chan->ring_virt); + + chan->ring_size = ring_size; + + chan->ring_virt = dma_alloc_coherent(&dma_pdev->dev, + ring_size * sizeof(u64), + &chan->ring_dma, GFP_KERNEL); + + if (!chan->ring_virt) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL(pasemi_dma_alloc_ring); + +/* pasemi_dma_free_ring - Free an allocated descriptor ring for a channel + * @chan: Channel for which to free the descriptor ring + * + * Frees a previously allocated descriptor ring for a channel. + */ +void pasemi_dma_free_ring(struct pasemi_dmachan *chan) +{ + BUG_ON(!chan->ring_virt); + + dma_free_coherent(&dma_pdev->dev, chan->ring_size * sizeof(u64), + chan->ring_virt, chan->ring_dma); + chan->ring_virt = NULL; + chan->ring_size = 0; + chan->ring_dma = 0; +} +EXPORT_SYMBOL(pasemi_dma_free_ring); + +/* pasemi_dma_start_chan - Start a DMA channel + * @chan: Channel to start + * @cmdsta: Additional CCMDSTA/TCMDSTA bits to write + * + * Enables (starts) a DMA channel with optional additional arguments. + */ +void pasemi_dma_start_chan(const struct pasemi_dmachan *chan, const u32 cmdsta) +{ + if (chan->chan_type == RXCHAN) + pasemi_write_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(chan->chno), + cmdsta | PAS_DMA_RXCHAN_CCMDSTA_EN); + else + pasemi_write_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(chan->chno), + cmdsta | PAS_DMA_TXCHAN_TCMDSTA_EN); +} +EXPORT_SYMBOL(pasemi_dma_start_chan); + +/* pasemi_dma_stop_chan - Stop a DMA channel + * @chan: Channel to stop + * + * Stops (disables) a DMA channel. This is done by setting the ST bit in the + * CMDSTA register and waiting on the ACT (active) bit to clear, then + * finally disabling the whole channel. + * + * This function will only try for a short while for the channel to stop, if + * it doesn't it will return failure. + * + * Returns 1 on success, 0 on failure. + */ +#define MAX_RETRIES 5000 +int pasemi_dma_stop_chan(const struct pasemi_dmachan *chan) +{ + int reg, retries; + u32 sta; + + if (chan->chan_type == RXCHAN) { + reg = PAS_DMA_RXCHAN_CCMDSTA(chan->chno); + pasemi_write_dma_reg(reg, PAS_DMA_RXCHAN_CCMDSTA_ST); + for (retries = 0; retries < MAX_RETRIES; retries++) { + sta = pasemi_read_dma_reg(reg); + if (!(sta & PAS_DMA_RXCHAN_CCMDSTA_ACT)) { + pasemi_write_dma_reg(reg, 0); + return 1; + } + cond_resched(); + } + } else { + reg = PAS_DMA_TXCHAN_TCMDSTA(chan->chno); + pasemi_write_dma_reg(reg, PAS_DMA_TXCHAN_TCMDSTA_ST); + for (retries = 0; retries < MAX_RETRIES; retries++) { + sta = pasemi_read_dma_reg(reg); + if (!(sta & PAS_DMA_TXCHAN_TCMDSTA_ACT)) { + pasemi_write_dma_reg(reg, 0); + return 1; + } + cond_resched(); + } + } + + return 0; +} +EXPORT_SYMBOL(pasemi_dma_stop_chan); + +/* pasemi_dma_alloc_buf - Allocate a buffer to use for DMA + * @chan: Channel to allocate for + * @size: Size of buffer in bytes + * @handle: DMA handle + * + * Allocate a buffer to be used by the DMA engine for read/write, + * similar to dma_alloc_coherent(). + * + * Returns the virtual address of the buffer, or NULL in case of failure. + */ +void *pasemi_dma_alloc_buf(struct pasemi_dmachan *chan, int size, + dma_addr_t *handle) +{ + return dma_alloc_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL); +} +EXPORT_SYMBOL(pasemi_dma_alloc_buf); + +/* pasemi_dma_free_buf - Free a buffer used for DMA + * @chan: Channel the buffer was allocated for + * @size: Size of buffer in bytes + * @handle: DMA handle + * + * Frees a previously allocated buffer. + */ +void pasemi_dma_free_buf(struct pasemi_dmachan *chan, int size, + dma_addr_t *handle) +{ + dma_free_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL); +} +EXPORT_SYMBOL(pasemi_dma_free_buf); + +/* pasemi_dma_alloc_flag - Allocate a flag (event) for channel synchronization + * + * Allocates a flag for use with channel synchronization (event descriptors). + * Returns allocated flag (0-63), < 0 on error. + */ +int pasemi_dma_alloc_flag(void) +{ + int bit; + +retry: + bit = find_first_bit(flags_free, MAX_FLAGS); + if (bit >= MAX_FLAGS) + return -ENOSPC; + if (!test_and_clear_bit(bit, flags_free)) + goto retry; + + return bit; +} +EXPORT_SYMBOL(pasemi_dma_alloc_flag); + + +/* pasemi_dma_free_flag - Deallocates a flag (event) + * @flag: Flag number to deallocate + * + * Frees up a flag so it can be reused for other purposes. + */ +void pasemi_dma_free_flag(int flag) +{ + BUG_ON(test_bit(flag, flags_free)); + BUG_ON(flag >= MAX_FLAGS); + set_bit(flag, flags_free); +} +EXPORT_SYMBOL(pasemi_dma_free_flag); + + +/* pasemi_dma_set_flag - Sets a flag (event) to 1 + * @flag: Flag number to set active + * + * Sets the flag provided to 1. + */ +void pasemi_dma_set_flag(int flag) +{ + BUG_ON(flag >= MAX_FLAGS); + if (flag < 32) + pasemi_write_dma_reg(PAS_DMA_TXF_SFLG0, 1 << flag); + else + pasemi_write_dma_reg(PAS_DMA_TXF_SFLG1, 1 << flag); +} +EXPORT_SYMBOL(pasemi_dma_set_flag); + +/* pasemi_dma_clear_flag - Sets a flag (event) to 0 + * @flag: Flag number to set inactive + * + * Sets the flag provided to 0. + */ +void pasemi_dma_clear_flag(int flag) +{ + BUG_ON(flag >= MAX_FLAGS); + if (flag < 32) + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 1 << flag); + else + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 1 << flag); +} +EXPORT_SYMBOL(pasemi_dma_clear_flag); + +/* pasemi_dma_alloc_fun - Allocate a function engine + * + * Allocates a function engine to use for crypto/checksum offload + * Returns allocated engine (0-8), < 0 on error. + */ +int pasemi_dma_alloc_fun(void) +{ + int bit; + +retry: + bit = find_first_bit(fun_free, MAX_FLAGS); + if (bit >= MAX_FLAGS) + return -ENOSPC; + if (!test_and_clear_bit(bit, fun_free)) + goto retry; + + return bit; +} +EXPORT_SYMBOL(pasemi_dma_alloc_fun); + + +/* pasemi_dma_free_fun - Deallocates a function engine + * @flag: Engine number to deallocate + * + * Frees up a function engine so it can be used for other purposes. + */ +void pasemi_dma_free_fun(int fun) +{ + BUG_ON(test_bit(fun, fun_free)); + BUG_ON(fun >= MAX_FLAGS); + set_bit(fun, fun_free); +} +EXPORT_SYMBOL(pasemi_dma_free_fun); + + +static void *map_onedev(struct pci_dev *p, int index) +{ + struct device_node *dn; + void __iomem *ret; + + dn = pci_device_to_OF_node(p); + if (!dn) + goto fallback; + + ret = of_iomap(dn, index); + if (!ret) + goto fallback; + + return ret; +fallback: + /* This is hardcoded and ugly, but we have some firmware versions + * that don't provide the register space in the device tree. Luckily + * they are at well-known locations so we can just do the math here. + */ + return ioremap(0xe0000000 + (p->devfn << 12), 0x2000); +} + +/* pasemi_dma_init - Initialize the PA Semi DMA library + * + * This function initializes the DMA library. It must be called before + * any other function in the library. + * + * Returns 0 on success, errno on failure. + */ +int pasemi_dma_init(void) +{ + static DEFINE_SPINLOCK(init_lock); + struct pci_dev *iob_pdev; + struct pci_dev *pdev; + struct resource res; + struct device_node *dn; + int i, intf, err = 0; + unsigned long timeout; + u32 tmp; + + if (!machine_is(pasemi)) + return -ENODEV; + + spin_lock(&init_lock); + + /* Make sure we haven't already initialized */ + if (dma_pdev) + goto out; + + iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL); + if (!iob_pdev) { + BUG(); + pr_warn("Can't find I/O Bridge\n"); + err = -ENODEV; + goto out; + } + iob_regs = map_onedev(iob_pdev, 0); + + dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL); + if (!dma_pdev) { + BUG(); + pr_warn("Can't find DMA controller\n"); + err = -ENODEV; + goto out; + } + dma_regs = map_onedev(dma_pdev, 0); + base_hw_irq = virq_to_hw(dma_pdev->irq); + + pci_read_config_dword(dma_pdev, PAS_DMA_CAP_TXCH, &tmp); + num_txch = (tmp & PAS_DMA_CAP_TXCH_TCHN_M) >> PAS_DMA_CAP_TXCH_TCHN_S; + + pci_read_config_dword(dma_pdev, PAS_DMA_CAP_RXCH, &tmp); + num_rxch = (tmp & PAS_DMA_CAP_RXCH_RCHN_M) >> PAS_DMA_CAP_RXCH_RCHN_S; + + intf = 0; + for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, NULL); + pdev; + pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, pdev)) + mac_regs[intf++] = map_onedev(pdev, 0); + + pci_dev_put(pdev); + + for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, NULL); + pdev; + pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, pdev)) + mac_regs[intf++] = map_onedev(pdev, 0); + + pci_dev_put(pdev); + + dn = pci_device_to_OF_node(iob_pdev); + if (dn) + err = of_address_to_resource(dn, 1, &res); + if (!dn || err) { + /* Fallback for old firmware */ + res.start = 0xfd800000; + res.end = res.start + 0x1000; + } + dma_status = ioremap_cache(res.start, resource_size(&res)); + pci_dev_put(iob_pdev); + + for (i = 0; i < MAX_TXCH; i++) + __set_bit(i, txch_free); + + for (i = 0; i < MAX_RXCH; i++) + __set_bit(i, rxch_free); + + timeout = jiffies + HZ; + pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0); + while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) { + if (time_after(jiffies, timeout)) { + pr_warn("Warning: Could not disable RX section\n"); + break; + } + } + + timeout = jiffies + HZ; + pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0); + while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) { + if (time_after(jiffies, timeout)) { + pr_warn("Warning: Could not disable TX section\n"); + break; + } + } + + /* setup resource allocations for the different DMA sections */ + tmp = pasemi_read_dma_reg(PAS_DMA_COM_CFG); + pasemi_write_dma_reg(PAS_DMA_COM_CFG, tmp | 0x18000000); + + /* enable tx section */ + pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, PAS_DMA_COM_TXCMD_EN); + + /* enable rx section */ + pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, PAS_DMA_COM_RXCMD_EN); + + for (i = 0; i < MAX_FLAGS; i++) + __set_bit(i, flags_free); + + for (i = 0; i < MAX_FUN; i++) + __set_bit(i, fun_free); + + /* clear all status flags */ + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff); + pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff); + + pr_info("PA Semi PWRficient DMA library initialized " + "(%d tx, %d rx channels)\n", num_txch, num_rxch); + +out: + spin_unlock(&init_lock); + return err; +} +EXPORT_SYMBOL(pasemi_dma_init); diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c new file mode 100644 index 0000000000..fd130fe7a6 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Author: Olof Johansson, PA Semi + * + * Maintained by: Olof Johansson + * + * Based on drivers/net/fs_enet/mii-bitbang.c. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DELAY 1 + +static void __iomem *gpio_regs; + +struct gpio_priv { + int mdc_pin; + int mdio_pin; +}; + +#define MDC_PIN(bus) (((struct gpio_priv *)bus->priv)->mdc_pin) +#define MDIO_PIN(bus) (((struct gpio_priv *)bus->priv)->mdio_pin) + +static inline void mdio_lo(struct mii_bus *bus) +{ + out_le32(gpio_regs+0x10, 1 << MDIO_PIN(bus)); +} + +static inline void mdio_hi(struct mii_bus *bus) +{ + out_le32(gpio_regs, 1 << MDIO_PIN(bus)); +} + +static inline void mdc_lo(struct mii_bus *bus) +{ + out_le32(gpio_regs+0x10, 1 << MDC_PIN(bus)); +} + +static inline void mdc_hi(struct mii_bus *bus) +{ + out_le32(gpio_regs, 1 << MDC_PIN(bus)); +} + +static inline void mdio_active(struct mii_bus *bus) +{ + out_le32(gpio_regs+0x20, (1 << MDC_PIN(bus)) | (1 << MDIO_PIN(bus))); +} + +static inline void mdio_tristate(struct mii_bus *bus) +{ + out_le32(gpio_regs+0x30, (1 << MDIO_PIN(bus))); +} + +static inline int mdio_read(struct mii_bus *bus) +{ + return !!(in_le32(gpio_regs+0x40) & (1 << MDIO_PIN(bus))); +} + +static void clock_out(struct mii_bus *bus, int bit) +{ + if (bit) + mdio_hi(bus); + else + mdio_lo(bus); + udelay(DELAY); + mdc_hi(bus); + udelay(DELAY); + mdc_lo(bus); +} + +/* Utility to send the preamble, address, and register (common to read and write). */ +static void bitbang_pre(struct mii_bus *bus, int read, u8 addr, u8 reg) +{ + int i; + + /* CFE uses a really long preamble (40 bits). We'll do the same. */ + mdio_active(bus); + for (i = 0; i < 40; i++) { + clock_out(bus, 1); + } + + /* send the start bit (01) and the read opcode (10) or write (10) */ + clock_out(bus, 0); + clock_out(bus, 1); + + clock_out(bus, read); + clock_out(bus, !read); + + /* send the PHY address */ + for (i = 0; i < 5; i++) { + clock_out(bus, (addr & 0x10) != 0); + addr <<= 1; + } + + /* send the register address */ + for (i = 0; i < 5; i++) { + clock_out(bus, (reg & 0x10) != 0); + reg <<= 1; + } +} + +static int gpio_mdio_read(struct mii_bus *bus, int phy_id, int location) +{ + u16 rdreg; + int ret, i; + u8 addr = phy_id & 0xff; + u8 reg = location & 0xff; + + bitbang_pre(bus, 1, addr, reg); + + /* tri-state our MDIO I/O pin so we can read */ + mdio_tristate(bus); + udelay(DELAY); + mdc_hi(bus); + udelay(DELAY); + mdc_lo(bus); + + /* read 16 bits of register data, MSB first */ + rdreg = 0; + for (i = 0; i < 16; i++) { + mdc_lo(bus); + udelay(DELAY); + mdc_hi(bus); + udelay(DELAY); + mdc_lo(bus); + udelay(DELAY); + rdreg <<= 1; + rdreg |= mdio_read(bus); + } + + mdc_hi(bus); + udelay(DELAY); + mdc_lo(bus); + udelay(DELAY); + + ret = rdreg; + + return ret; +} + +static int gpio_mdio_write(struct mii_bus *bus, int phy_id, int location, u16 val) +{ + int i; + + u8 addr = phy_id & 0xff; + u8 reg = location & 0xff; + u16 value = val & 0xffff; + + bitbang_pre(bus, 0, addr, reg); + + /* send the turnaround (10) */ + mdc_lo(bus); + mdio_hi(bus); + udelay(DELAY); + mdc_hi(bus); + udelay(DELAY); + mdc_lo(bus); + mdio_lo(bus); + udelay(DELAY); + mdc_hi(bus); + udelay(DELAY); + + /* write 16 bits of register data, MSB first */ + for (i = 0; i < 16; i++) { + mdc_lo(bus); + if (value & 0x8000) + mdio_hi(bus); + else + mdio_lo(bus); + udelay(DELAY); + mdc_hi(bus); + udelay(DELAY); + value <<= 1; + } + + /* + * Tri-state the MDIO line. + */ + mdio_tristate(bus); + mdc_lo(bus); + udelay(DELAY); + mdc_hi(bus); + udelay(DELAY); + return 0; +} + +static int gpio_mdio_reset(struct mii_bus *bus) +{ + /*nothing here - dunno how to reset it*/ + return 0; +} + + +static int gpio_mdio_probe(struct platform_device *ofdev) +{ + struct device *dev = &ofdev->dev; + struct device_node *np = ofdev->dev.of_node; + struct mii_bus *new_bus; + struct gpio_priv *priv; + const unsigned int *prop; + int err; + + err = -ENOMEM; + priv = kzalloc(sizeof(struct gpio_priv), GFP_KERNEL); + if (!priv) + goto out; + + new_bus = mdiobus_alloc(); + + if (!new_bus) + goto out_free_priv; + + new_bus->name = "pasemi gpio mdio bus"; + new_bus->read = &gpio_mdio_read; + new_bus->write = &gpio_mdio_write; + new_bus->reset = &gpio_mdio_reset; + + prop = of_get_property(np, "reg", NULL); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", *prop); + new_bus->priv = priv; + + prop = of_get_property(np, "mdc-pin", NULL); + priv->mdc_pin = *prop; + + prop = of_get_property(np, "mdio-pin", NULL); + priv->mdio_pin = *prop; + + new_bus->parent = dev; + dev_set_drvdata(dev, new_bus); + + err = of_mdiobus_register(new_bus, np); + + if (err != 0) { + pr_err("%s: Cannot register as MDIO bus, err %d\n", + new_bus->name, err); + goto out_free_irq; + } + + return 0; + +out_free_irq: + kfree(new_bus); +out_free_priv: + kfree(priv); +out: + return err; +} + + +static int gpio_mdio_remove(struct platform_device *dev) +{ + struct mii_bus *bus = dev_get_drvdata(&dev->dev); + + mdiobus_unregister(bus); + + dev_set_drvdata(&dev->dev, NULL); + + kfree(bus->priv); + bus->priv = NULL; + mdiobus_free(bus); + + return 0; +} + +static const struct of_device_id gpio_mdio_match[] = +{ + { + .compatible = "gpio-mdio", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, gpio_mdio_match); + +static struct platform_driver gpio_mdio_driver = +{ + .probe = gpio_mdio_probe, + .remove = gpio_mdio_remove, + .driver = { + .name = "gpio-mdio-bitbang", + .of_match_table = gpio_mdio_match, + }, +}; + +static int __init gpio_mdio_init(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "1682m-gpio"); + if (!np) + np = of_find_compatible_node(NULL, NULL, + "pasemi,pwrficient-gpio"); + if (!np) + return -ENODEV; + gpio_regs = of_iomap(np, 0); + of_node_put(np); + + if (!gpio_regs) + return -ENODEV; + + return platform_driver_register(&gpio_mdio_driver); +} +module_init(gpio_mdio_init); + +static void __exit gpio_mdio_exit(void) +{ + platform_driver_unregister(&gpio_mdio_driver); + if (gpio_regs) + iounmap(gpio_regs); +} +module_exit(gpio_mdio_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Olof Johansson "); +MODULE_DESCRIPTION("Driver for MDIO over GPIO on PA Semi PWRficient-based boards"); diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c new file mode 100644 index 0000000000..6087c70ed2 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Maintained by: Olof Johansson + */ + +#undef DEBUG + +#include +#include +#include + +#include +#include +#include + +#include "pasemi.h" + +struct sleep_mode { + char *name; + void (*entry)(void); +}; + +static struct sleep_mode modes[] = { + { .name = "spin", .entry = &idle_spin }, + { .name = "doze", .entry = &idle_doze }, +}; + +static int current_mode = 0; + +static int pasemi_system_reset_exception(struct pt_regs *regs) +{ + /* If we were woken up from power savings, we need to return + * to the calling function, since nip is not saved across + * all modes. + */ + + if (regs->msr & SRR1_WAKEMASK) + regs_set_return_ip(regs, regs->link); + + switch (regs->msr & SRR1_WAKEMASK) { + case SRR1_WAKEDEC: + set_dec(1); + break; + case SRR1_WAKEEE: + /* + * Handle these when interrupts get re-enabled and we take + * them as regular exceptions. We are in an NMI context + * and can't handle these here. + */ + break; + default: + /* do system reset */ + return 0; + } + + /* Set higher astate since we come out of power savings at 0 */ + restore_astate(hard_smp_processor_id()); + + /* everything handled */ + regs_set_recoverable(regs); + return 1; +} + +static int __init pasemi_idle_init(void) +{ +#ifndef CONFIG_PPC_PASEMI_CPUFREQ + pr_warn("No cpufreq driver, powersavings modes disabled\n"); + current_mode = 0; +#endif + + ppc_md.system_reset_exception = pasemi_system_reset_exception; + ppc_md.power_save = modes[current_mode].entry; + pr_info("Using PA6T idle loop (%s)\n", modes[current_mode].name); + + return 0; +} +machine_late_initcall(pasemi, pasemi_idle_init); + +static int __init idle_param(char *p) +{ + int i; + for (i = 0; i < ARRAY_SIZE(modes); i++) { + if (!strcmp(modes[i].name, p)) { + current_mode = i; + break; + } + } + return 0; +} + +early_param("idle", idle_param); diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c new file mode 100644 index 0000000000..375487cba8 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2005-2008, PA Semi, Inc + * + * Maintained by: Olof Johansson + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pasemi.h" + +#define IOBMAP_PAGE_SHIFT 12 +#define IOBMAP_PAGE_SIZE (1 << IOBMAP_PAGE_SHIFT) +#define IOBMAP_PAGE_MASK (IOBMAP_PAGE_SIZE - 1) + +#define IOB_BASE 0xe0000000 +#define IOB_SIZE 0x3000 +/* Configuration registers */ +#define IOBCAP_REG 0x40 +#define IOBCOM_REG 0x100 +/* Enable IOB address translation */ +#define IOBCOM_ATEN 0x00000100 + +/* Address decode configuration register */ +#define IOB_AD_REG 0x14c +/* IOBCOM_AD_REG fields */ +#define IOB_AD_VGPRT 0x00000e00 +#define IOB_AD_VGAEN 0x00000100 +/* Direct mapping settings */ +#define IOB_AD_MPSEL_MASK 0x00000030 +#define IOB_AD_MPSEL_B38 0x00000000 +#define IOB_AD_MPSEL_B40 0x00000010 +#define IOB_AD_MPSEL_B42 0x00000020 +/* Translation window size / enable */ +#define IOB_AD_TRNG_MASK 0x00000003 +#define IOB_AD_TRNG_256M 0x00000000 +#define IOB_AD_TRNG_2G 0x00000001 +#define IOB_AD_TRNG_128G 0x00000003 + +#define IOB_TABLEBASE_REG 0x154 + +/* Base of the 64 4-byte L1 registers */ +#define IOB_XLT_L1_REGBASE 0x2b00 + +/* Register to invalidate TLB entries */ +#define IOB_AT_INVAL_TLB_REG 0x2d00 + +/* The top two bits of the level 1 entry contains valid and type flags */ +#define IOBMAP_L1E_V 0x40000000 +#define IOBMAP_L1E_V_B 0x80000000 + +/* For big page entries, the bottom two bits contains flags */ +#define IOBMAP_L1E_BIG_CACHED 0x00000002 +#define IOBMAP_L1E_BIG_PRIORITY 0x00000001 + +/* For regular level 2 entries, top 2 bits contain valid and cache flags */ +#define IOBMAP_L2E_V 0x80000000 +#define IOBMAP_L2E_V_CACHED 0xc0000000 + +static void __iomem *iob; +static u32 iob_l1_emptyval; +static u32 iob_l2_emptyval; +static u32 *iob_l2_base; + +static struct iommu_table iommu_table_iobmap; +static int iommu_table_iobmap_inited; + +static int iobmap_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + unsigned long attrs) +{ + u32 *ip; + u32 rpn; + unsigned long bus_addr; + + pr_debug("iobmap: build at: %lx, %lx, addr: %lx\n", index, npages, uaddr); + + bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT; + + ip = ((u32 *)tbl->it_base) + index; + + while (npages--) { + rpn = __pa(uaddr) >> IOBMAP_PAGE_SHIFT; + + *(ip++) = IOBMAP_L2E_V | rpn; + /* invalidate tlb, can be optimized more */ + out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14); + + uaddr += IOBMAP_PAGE_SIZE; + bus_addr += IOBMAP_PAGE_SIZE; + } + return 0; +} + + +static void iobmap_free(struct iommu_table *tbl, long index, + long npages) +{ + u32 *ip; + unsigned long bus_addr; + + pr_debug("iobmap: free at: %lx, %lx\n", index, npages); + + bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT; + + ip = ((u32 *)tbl->it_base) + index; + + while (npages--) { + *(ip++) = iob_l2_emptyval; + /* invalidate tlb, can be optimized more */ + out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14); + bus_addr += IOBMAP_PAGE_SIZE; + } +} + +static struct iommu_table_ops iommu_table_iobmap_ops = { + .set = iobmap_build, + .clear = iobmap_free +}; + +static void iommu_table_iobmap_setup(void) +{ + pr_debug(" -> %s\n", __func__); + iommu_table_iobmap.it_busno = 0; + iommu_table_iobmap.it_offset = 0; + iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT; + + /* it_size is in number of entries */ + iommu_table_iobmap.it_size = + 0x80000000 >> iommu_table_iobmap.it_page_shift; + + /* Initialize the common IOMMU code */ + iommu_table_iobmap.it_base = (unsigned long)iob_l2_base; + iommu_table_iobmap.it_index = 0; + /* XXXOJN tune this to avoid IOB cache invals. + * Should probably be 8 (64 bytes) + */ + iommu_table_iobmap.it_blocksize = 4; + iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops; + if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0)) + panic("Failed to initialize iommu table"); + + pr_debug(" <- %s\n", __func__); +} + + + +static void pci_dma_bus_setup_pasemi(struct pci_bus *bus) +{ + pr_debug("pci_dma_bus_setup, bus %p, bus->self %p\n", bus, bus->self); + + if (!iommu_table_iobmap_inited) { + iommu_table_iobmap_inited = 1; + iommu_table_iobmap_setup(); + } +} + + +static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) +{ + pr_debug("pci_dma_dev_setup, dev %p (%s)\n", dev, pci_name(dev)); + +#if !defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE) + /* For non-LPAR environment, don't translate anything for the DMA + * engine. The exception to this is if the user has enabled + * CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE at build time. + */ + if (dev->vendor == 0x1959 && dev->device == 0xa007 && + !firmware_has_feature(FW_FEATURE_LPAR)) { + dev->dev.dma_ops = NULL; + /* + * Set the coherent DMA mask to prevent the iommu + * being used unnecessarily + */ + dev->dev.coherent_dma_mask = DMA_BIT_MASK(44); + return; + } +#endif + + set_iommu_table_base(&dev->dev, &iommu_table_iobmap); +} + +static int __init iob_init(struct device_node *dn) +{ + unsigned long tmp; + u32 regword; + int i; + + pr_debug(" -> %s\n", __func__); + + /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ + iob_l2_base = memblock_alloc_try_nid_raw(1UL << 21, 1UL << 21, + MEMBLOCK_LOW_LIMIT, 0x80000000, + NUMA_NO_NODE); + if (!iob_l2_base) + panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%x\n", + __func__, 1UL << 21, 1UL << 21, 0x80000000); + + pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base); + + /* Allocate a spare page to map all invalid IOTLB pages. */ + tmp = memblock_phys_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE); + if (!tmp) + panic("IOBMAP: Cannot allocate spare page!"); + /* Empty l1 is marked invalid */ + iob_l1_emptyval = 0; + /* Empty l2 is mapped to dummy page */ + iob_l2_emptyval = IOBMAP_L2E_V | (tmp >> IOBMAP_PAGE_SHIFT); + + iob = ioremap(IOB_BASE, IOB_SIZE); + if (!iob) + panic("IOBMAP: Cannot map registers!"); + + /* setup direct mapping of the L1 entries */ + for (i = 0; i < 64; i++) { + /* Each L1 covers 32MB, i.e. 8K entries = 32K of ram */ + regword = IOBMAP_L1E_V | (__pa(iob_l2_base + i*0x2000) >> 12); + out_le32(iob+IOB_XLT_L1_REGBASE+i*4, regword); + } + + /* set 2GB translation window, based at 0 */ + regword = in_le32(iob+IOB_AD_REG); + regword &= ~IOB_AD_TRNG_MASK; + regword |= IOB_AD_TRNG_2G; + out_le32(iob+IOB_AD_REG, regword); + + /* Enable translation */ + regword = in_le32(iob+IOBCOM_REG); + regword |= IOBCOM_ATEN; + out_le32(iob+IOBCOM_REG, regword); + + pr_debug(" <- %s\n", __func__); + + return 0; +} + + +/* These are called very early. */ +void __init iommu_init_early_pasemi(void) +{ + int iommu_off; + +#ifndef CONFIG_PPC_PASEMI_IOMMU + iommu_off = 1; +#else + iommu_off = of_chosen && + of_property_read_bool(of_chosen, "linux,iommu-off"); +#endif + if (iommu_off) + return; + + iob_init(NULL); + + pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi; + pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi; + set_pci_dma_ops(&dma_iommu_ops); +} diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c new file mode 100644 index 0000000000..9e9a7e4628 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2007 PA Semi, Inc + * + * Parts based on arch/powerpc/sysdev/fsl_soc.c: + * + * 2006 (c) MontaVista Software, Inc. + */ + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_I2C_BOARDINFO +/* The below is from fsl_soc.c. It's copied because since there are no + * official bus bindings at this time it doesn't make sense to share across + * the platforms, even though they happen to be common. + */ +struct i2c_driver_device { + char *of_device; + char *i2c_type; +}; + +static struct i2c_driver_device i2c_devices[] __initdata = { + {"dallas,ds1338", "ds1338"}, +}; + +static int __init find_i2c_driver(struct device_node *node, + struct i2c_board_info *info) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) { + if (!of_device_is_compatible(node, i2c_devices[i].of_device)) + continue; + if (strscpy(info->type, i2c_devices[i].i2c_type, I2C_NAME_SIZE) < 0) + return -ENOMEM; + return 0; + } + return -ENODEV; +} + +static int __init pasemi_register_i2c_devices(void) +{ + struct pci_dev *pdev; + struct device_node *adap_node; + struct device_node *node; + + pdev = NULL; + while ((pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa003, pdev))) { + adap_node = pci_device_to_OF_node(pdev); + + if (!adap_node) + continue; + + for_each_child_of_node(adap_node, node) { + struct i2c_board_info info = {}; + const u32 *addr; + int len; + + addr = of_get_property(node, "reg", &len); + if (!addr || len < sizeof(int) || + *addr > (1 << 10) - 1) { + pr_warn("pasemi_register_i2c_devices: invalid i2c device entry\n"); + continue; + } + + info.irq = irq_of_parse_and_map(node, 0); + if (!info.irq) + info.irq = -1; + + if (find_i2c_driver(node, &info) < 0) + continue; + + info.addr = *addr; + + i2c_register_board_info(PCI_FUNC(pdev->devfn), &info, + 1); + } + } + return 0; +} +device_initcall(pasemi_register_i2c_devices); +#endif diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c new file mode 100644 index 0000000000..166c97fff1 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2007, Olof Johansson, PA Semi + * + * Based on arch/powerpc/sysdev/mpic_u3msi.c: + * + * Copyright 2006, Segher Boessenkool, IBM Corporation. + * Copyright 2006-2007, Michael Ellerman, IBM Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Allocate 16 interrupts per device, to give an alignment of 16, + * since that's the size of the grouping w.r.t. affinity. If someone + * needs more than 32 MSI's down the road we'll have to rethink this, + * but it should be OK for now. + */ +#define ALLOC_CHUNK 16 + +#define PASEMI_MSI_ADDR 0xfc080000 + +/* A bit ugly, can we get this from the pci_dev somehow? */ +static struct mpic *msi_mpic; + + +static void mpic_pasemi_msi_mask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq); + pci_msi_mask_irq(data); + mpic_mask_irq(data); +} + +static void mpic_pasemi_msi_unmask_irq(struct irq_data *data) +{ + pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq); + mpic_unmask_irq(data); + pci_msi_unmask_irq(data); +} + +static struct irq_chip mpic_pasemi_msi_chip = { + .irq_shutdown = mpic_pasemi_msi_mask_irq, + .irq_mask = mpic_pasemi_msi_mask_irq, + .irq_unmask = mpic_pasemi_msi_unmask_irq, + .irq_eoi = mpic_end_irq, + .irq_set_type = mpic_set_irq_type, + .irq_set_affinity = mpic_set_affinity, + .name = "PASEMI-MSI", +}; + +static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct msi_desc *entry; + irq_hw_number_t hwirq; + + pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); + + msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) { + hwirq = virq_to_hw(entry->irq); + irq_set_msi_desc(entry->irq, NULL); + irq_dispose_mapping(entry->irq); + entry->irq = 0; + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); + } +} + +static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + unsigned int virq; + struct msi_desc *entry; + struct msi_msg msg; + int hwirq; + + if (type == PCI_CAP_ID_MSIX) + pr_debug("pasemi_msi: MSI-X untested, trying anyway\n"); + pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n", + pdev, nvec, type); + + msg.address_hi = 0; + msg.address_lo = PASEMI_MSI_ADDR; + + msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) { + /* Allocate 16 interrupts for now, since that's the grouping for + * affinity. This can be changed later if it turns out 32 is too + * few MSIs for someone, but restrictions will apply to how the + * sources can be changed independently. + */ + hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap, + ALLOC_CHUNK); + if (hwirq < 0) { + pr_debug("pasemi_msi: failed allocating hwirq\n"); + return hwirq; + } + + virq = irq_create_mapping(msi_mpic->irqhost, hwirq); + if (!virq) { + pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n", + hwirq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, + ALLOC_CHUNK); + return -ENOSPC; + } + + /* Vector on MSI is really an offset, the hardware adds + * it to the value written at the magic address. So set + * it to 0 to remain sane. + */ + mpic_set_vector(virq, 0); + + irq_set_msi_desc(virq, entry); + irq_set_chip(virq, &mpic_pasemi_msi_chip); + irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING); + + pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \ + "addr 0x%x\n", virq, hwirq, msg.address_lo); + + /* Likewise, the device writes [0...511] into the target + * register to generate MSI [512...1023] + */ + msg.data = hwirq-0x200; + pci_write_msi_msg(virq, &msg); + } + + return 0; +} + +int __init mpic_pasemi_msi_init(struct mpic *mpic) +{ + int rc; + struct pci_controller *phb; + struct device_node *of_node; + + of_node = irq_domain_get_of_node(mpic->irqhost); + if (!of_node || + !of_device_is_compatible(of_node, + "pasemi,pwrficient-openpic")) + return -ENODEV; + + rc = mpic_msi_init_allocator(mpic); + if (rc) { + pr_debug("pasemi_msi: Error allocating bitmap!\n"); + return rc; + } + + pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n"); + + msi_mpic = mpic; + list_for_each_entry(phb, &hose_list, list_node) { + WARN_ON(phb->controller_ops.setup_msi_irqs); + phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs; + phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs; + } + + return 0; +} diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h new file mode 100644 index 0000000000..018c30665e --- /dev/null +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PASEMI_PASEMI_H +#define _PASEMI_PASEMI_H + +extern time64_t pas_get_boot_time(void); +extern void pas_pci_init(void); +struct pci_dev; +extern void pas_pci_irq_fixup(struct pci_dev *dev); +extern void pas_pci_dma_dev_setup(struct pci_dev *dev); + +void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); + +extern void __init pasemi_map_registers(void); + +/* Power savings modes, implemented in asm */ +extern void idle_spin(void); +extern void idle_doze(void); + +/* Restore astate to last set */ +#ifdef CONFIG_PPC_PASEMI_CPUFREQ +extern int check_astate(void); +extern void restore_astate(int cpu); +#else +static inline int check_astate(void) +{ + /* Always return >0 so we never power save */ + return 1; +} +static inline void restore_astate(int cpu) +{ +} +#endif + +extern struct pci_controller_ops pasemi_pci_controller_ops; + +#endif /* _PASEMI_PASEMI_H */ diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c new file mode 100644 index 0000000000..f27d314147 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2006 PA Semi, Inc + * + * Authors: Kip Walker, PA Semi + * Olof Johansson, PA Semi + * + * Maintained by: Olof Johansson + * + * Based on arch/powerpc/platforms/maple/pci.c + */ + + +#include +#include +#include + +#include +#include +#include + +#include + +#include "pasemi.h" + +#define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off)) + +static inline int pa_pxp_offset_valid(u8 bus, u8 devfn, int offset) +{ + /* Device 0 Function 0 is special: It's config space spans function 1 as + * well, so allow larger offset. It's really a two-function device but the + * second function does not probe. + */ + if (bus == 0 && devfn == 0) + return offset < 8192; + else + return offset < 4096; +} + +static void volatile __iomem *pa_pxp_cfg_addr(struct pci_controller *hose, + u8 bus, u8 devfn, int offset) +{ + return hose->cfg_data + PA_PXP_CFA(bus, devfn, offset); +} + +static inline int is_root_port(int busno, int devfn) +{ + return ((busno == 0) && (PCI_FUNC(devfn) < 4) && + ((PCI_SLOT(devfn) == 16) || (PCI_SLOT(devfn) == 17))); +} + +static inline int is_5945_reg(int reg) +{ + return (((reg >= 0x18) && (reg < 0x34)) || + ((reg >= 0x158) && (reg < 0x178))); +} + +static int workaround_5945(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + void volatile __iomem *addr, *dummy; + int byte; + u32 tmp; + + if (!is_root_port(bus->number, devfn) || !is_5945_reg(offset)) + return 0; + + hose = pci_bus_to_host(bus); + + addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset & ~0x3); + byte = offset & 0x3; + + /* Workaround bug 5945: write 0 to a dummy register before reading, + * and write back what we read. We must read/write the full 32-bit + * contents so we need to shift and mask by hand. + */ + dummy = pa_pxp_cfg_addr(hose, bus->number, devfn, 0x10); + out_le32(dummy, 0); + tmp = in_le32(addr); + out_le32(addr, tmp); + + switch (len) { + case 1: + *val = (tmp >> (8*byte)) & 0xff; + break; + case 2: + if (byte == 0) + *val = tmp & 0xffff; + else + *val = (tmp >> 16) & 0xffff; + break; + default: + *val = tmp; + break; + } + + return 1; +} + +#ifdef CONFIG_PPC_PASEMI_NEMO +#define PXP_ERR_CFG_REG 0x4 +#define PXP_IGNORE_PCIE_ERRORS 0x800 +#define SB600_BUS 5 + +static void sb600_set_flag(int bus) +{ + static void __iomem *iob_mapbase = NULL; + struct resource res; + struct device_node *dn; + int err; + + if (iob_mapbase == NULL) { + dn = of_find_compatible_node(NULL, "isa", "pasemi,1682m-iob"); + if (!dn) { + pr_crit("NEMO SB600 missing iob node\n"); + return; + } + + err = of_address_to_resource(dn, 0, &res); + of_node_put(dn); + + if (err) { + pr_crit("NEMO SB600 missing resource\n"); + return; + } + + pr_info("NEMO SB600 IOB base %08llx\n",res.start); + + iob_mapbase = ioremap(res.start + 0x100, 0x94); + } + + if (iob_mapbase != NULL) { + if (bus == SB600_BUS) { + /* + * This is the SB600's bus, tell the PCI-e root port + * to allow non-zero devices to enumerate. + */ + out_le32(iob_mapbase + PXP_ERR_CFG_REG, in_le32(iob_mapbase + PXP_ERR_CFG_REG) | PXP_IGNORE_PCIE_ERRORS); + } else { + /* + * Only scan device 0 on other busses + */ + out_le32(iob_mapbase + PXP_ERR_CFG_REG, in_le32(iob_mapbase + PXP_ERR_CFG_REG) & ~PXP_IGNORE_PCIE_ERRORS); + } + } +} + +#else + +static void sb600_set_flag(int bus) +{ +} +#endif + +static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + void volatile __iomem *addr; + + hose = pci_bus_to_host(bus); + if (!hose) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (!pa_pxp_offset_valid(bus->number, devfn, offset)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + if (workaround_5945(bus, devfn, offset, len, val)) + return PCIBIOS_SUCCESSFUL; + + addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset); + + sb600_set_flag(bus->number); + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8(addr); + break; + case 2: + *val = in_le16(addr); + break; + default: + *val = in_le32(addr); + break; + } + + return PCIBIOS_SUCCESSFUL; +} + +static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + void volatile __iomem *addr; + + hose = pci_bus_to_host(bus); + if (!hose) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (!pa_pxp_offset_valid(bus->number, devfn, offset)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset); + + sb600_set_flag(bus->number); + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8(addr, val); + break; + case 2: + out_le16(addr, val); + break; + default: + out_le32(addr, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops pa_pxp_ops = { + .read = pa_pxp_read_config, + .write = pa_pxp_write_config, +}; + +static void __init setup_pa_pxp(struct pci_controller *hose) +{ + hose->ops = &pa_pxp_ops; + hose->cfg_data = ioremap(0xe0000000, 0x10000000); +} + +static int __init pas_add_bridge(struct device_node *dev) +{ + struct pci_controller *hose; + + pr_debug("Adding PCI host bridge %pOF\n", dev); + + hose = pcibios_alloc_controller(dev); + if (!hose) + return -ENOMEM; + + hose->first_busno = 0; + hose->last_busno = 0xff; + hose->controller_ops = pasemi_pci_controller_ops; + + setup_pa_pxp(hose); + + pr_info("Found PA-PXP PCI host bridge.\n"); + + /* Interpret the "ranges" property */ + pci_process_bridge_OF_ranges(hose, dev, 1); + + /* + * Scan for an isa bridge. This is needed to find the SB600 on the nemo + * and does nothing on machines without one. + */ + isa_bridge_find_early(hose); + + return 0; +} + +void __init pas_pci_init(void) +{ + struct device_node *np; + int res; + + pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS); + + np = of_find_compatible_node(of_root, NULL, "pasemi,rootbus"); + if (np) { + res = pas_add_bridge(np); + of_node_put(np); + } +} + +void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) +{ + struct pci_controller *hose; + + hose = pci_bus_to_host(dev->bus); + + return (void __iomem *)pa_pxp_cfg_addr(hose, dev->bus->number, dev->devfn, offset); +} + +struct pci_controller_ops pasemi_pci_controller_ops; diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S new file mode 100644 index 0000000000..d0215d5329 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/powersave.S @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Maintained by: Olof Johansson + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Power savings opcodes since not all binutils have them at this time */ +#define DOZE .long 0x4c000324 +#define NAP .long 0x4c000364 +#define SLEEP .long 0x4c0003a4 +#define RVW .long 0x4c0003e4 + +/* Common sequence to do before going to any of the + * powersavings modes. + */ + +#define PRE_SLEEP_SEQUENCE \ + std r3,8(r1); \ + ptesync ; \ + ld r3,8(r1); \ +1: cmpd r3,r3; \ + bne 1b + +_doze: + PRE_SLEEP_SEQUENCE + DOZE + b . + + +_GLOBAL(idle_spin) + blr + +_GLOBAL(idle_doze) + LOAD_REG_ADDR(r3, _doze) + b sleep_common + +/* Add more modes here later */ + +sleep_common: + mflr r0 + std r0, 16(r1) + stdu r1,-64(r1) +#ifdef CONFIG_PPC_PASEMI_CPUFREQ + std r3, 48(r1) + + /* Only do power savings when in astate 0 */ + bl check_astate + cmpwi r3,0 + bne 1f + + ld r3, 48(r1) +#endif + LOAD_REG_IMMEDIATE(r6,MSR_DR|MSR_IR|MSR_ME|MSR_EE) + mfmsr r4 + andc r5,r4,r6 + mtmsrd r5,0 + + mtctr r3 + bctrl + + mtmsrd r4,0 + +1: addi r1,r1,64 + ld r0,16(r1) + mtlr r0 + blr + diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c new file mode 100644 index 0000000000..ef985ba2bf --- /dev/null +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2006-2007 PA Semi, Inc + * + * Authors: Kip Walker, PA Semi + * Olof Johansson, PA Semi + * + * Maintained by: Olof Johansson + * + * Based on arch/powerpc/platforms/maple/setup.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "pasemi.h" + +/* SDC reset register, must be pre-mapped at reset time */ +static void __iomem *reset_reg; + +/* Various error status registers, must be pre-mapped at MCE time */ + +#define MAX_MCE_REGS 32 +struct mce_regs { + char *name; + void __iomem *addr; +}; + +static struct mce_regs mce_regs[MAX_MCE_REGS]; +static int num_mce_regs; +static int nmi_virq = 0; + + +static void __noreturn pas_restart(char *cmd) +{ + /* Need to put others cpu in hold loop so they're not sleeping */ + smp_send_stop(); + udelay(10000); + printk("Restarting...\n"); + while (1) + out_le32(reset_reg, 0x6000000); +} + +#ifdef CONFIG_PPC_PASEMI_NEMO +void pas_shutdown(void) +{ + /* Set the PLD bit that makes the SB600 think the power button is being pressed */ + void __iomem *pld_map = ioremap(0xf5000000,4096); + while (1) + out_8(pld_map+7,0x01); +} + +/* RTC platform device structure as is not in device tree */ +static struct resource rtc_resource[] = {{ + .name = "rtc", + .start = 0x70, + .end = 0x71, + .flags = IORESOURCE_IO, +}, { + .name = "rtc", + .start = 8, + .end = 8, + .flags = IORESOURCE_IRQ, +}}; + +static inline void nemo_init_rtc(void) +{ + platform_device_register_simple("rtc_cmos", -1, rtc_resource, 2); +} + +#else + +static inline void nemo_init_rtc(void) +{ +} +#endif + +#ifdef CONFIG_SMP +static arch_spinlock_t timebase_lock; +static unsigned long timebase; + +static void pas_give_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + hard_irq_disable(); + arch_spin_lock(&timebase_lock); + mtspr(SPRN_TBCTL, TBCTL_FREEZE); + isync(); + timebase = get_tb(); + arch_spin_unlock(&timebase_lock); + + while (timebase) + barrier(); + mtspr(SPRN_TBCTL, TBCTL_RESTART); + local_irq_restore(flags); +} + +static void pas_take_timebase(void) +{ + while (!timebase) + smp_rmb(); + + arch_spin_lock(&timebase_lock); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + arch_spin_unlock(&timebase_lock); +} + +static struct smp_ops_t pas_smp_ops = { + .probe = smp_mpic_probe, + .message_pass = smp_mpic_message_pass, + .kick_cpu = smp_generic_kick_cpu, + .setup_cpu = smp_mpic_setup_cpu, + .give_timebase = pas_give_timebase, + .take_timebase = pas_take_timebase, +}; +#endif /* CONFIG_SMP */ + +static void __init pas_setup_arch(void) +{ +#ifdef CONFIG_SMP + /* Setup SMP callback */ + smp_ops = &pas_smp_ops; +#endif + + /* Remap SDC register for doing reset */ + /* XXXOJN This should maybe come out of the device tree */ + reset_reg = ioremap(0xfc101100, 4); +} + +static int __init pas_setup_mce_regs(void) +{ + struct pci_dev *dev; + int reg; + + /* Remap various SoC status registers for use by the MCE handler */ + + reg = 0; + + dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, NULL); + while (dev && reg < MAX_MCE_REGS) { + mce_regs[reg].name = kasprintf(GFP_KERNEL, + "mc%d_mcdebug_errsta", reg); + mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x730); + dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, dev); + reg++; + } + + dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL); + if (dev && reg+4 < MAX_MCE_REGS) { + mce_regs[reg].name = "iobdbg_IntStatus1"; + mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x438); + reg++; + mce_regs[reg].name = "iobdbg_IOCTbusIntDbgReg"; + mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x454); + reg++; + mce_regs[reg].name = "iobiom_IntStatus"; + mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc10); + reg++; + mce_regs[reg].name = "iobiom_IntDbgReg"; + mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc1c); + reg++; + } + + dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa009, NULL); + if (dev && reg+2 < MAX_MCE_REGS) { + mce_regs[reg].name = "l2csts_IntStatus"; + mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x200); + reg++; + mce_regs[reg].name = "l2csts_Cnt"; + mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x214); + reg++; + } + + num_mce_regs = reg; + + return 0; +} +machine_device_initcall(pasemi, pas_setup_mce_regs); + +#ifdef CONFIG_PPC_PASEMI_NEMO +static void sb600_8259_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int cascade_irq = i8259_irq(); + + if (cascade_irq) + generic_handle_irq(cascade_irq); + + chip->irq_eoi(&desc->irq_data); +} + +static void __init nemo_init_IRQ(struct mpic *mpic) +{ + struct device_node *np; + int gpio_virq; + /* Connect the SB600's legacy i8259 controller */ + np = of_find_node_by_path("/pxp@0,e0000000"); + i8259_init(np, 0); + of_node_put(np); + + gpio_virq = irq_create_mapping(NULL, 3); + irq_set_irq_type(gpio_virq, IRQ_TYPE_LEVEL_HIGH); + irq_set_chained_handler(gpio_virq, sb600_8259_cascade); + mpic_unmask_irq(irq_get_irq_data(gpio_virq)); + + irq_set_default_host(mpic->irqhost); +} + +#else + +static inline void nemo_init_IRQ(struct mpic *mpic) +{ +} +#endif + +static __init void pas_init_IRQ(void) +{ + struct device_node *np; + struct device_node *root, *mpic_node; + unsigned long openpic_addr; + const unsigned int *opprop; + int naddr, opplen; + int mpic_flags; + const unsigned int *nmiprop; + struct mpic *mpic; + + mpic_node = NULL; + + for_each_node_by_type(np, "interrupt-controller") + if (of_device_is_compatible(np, "open-pic")) { + mpic_node = np; + break; + } + if (!mpic_node) + for_each_node_by_type(np, "open-pic") { + mpic_node = np; + break; + } + if (!mpic_node) { + pr_err("Failed to locate the MPIC interrupt controller\n"); + return; + } + + /* Find address list in /platform-open-pic */ + root = of_find_node_by_path("/"); + naddr = of_n_addr_cells(root); + opprop = of_get_property(root, "platform-open-pic", &opplen); + if (!opprop) { + pr_err("No platform-open-pic property.\n"); + of_node_put(root); + return; + } + openpic_addr = of_read_number(opprop, naddr); + pr_debug("OpenPIC addr: %lx\n", openpic_addr); + + mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET; + + nmiprop = of_get_property(mpic_node, "nmi-source", NULL); + if (nmiprop) + mpic_flags |= MPIC_ENABLE_MCK; + + mpic = mpic_alloc(mpic_node, openpic_addr, + mpic_flags, 0, 0, "PASEMI-OPIC"); + BUG_ON(!mpic); + + mpic_assign_isu(mpic, 0, mpic->paddr + 0x10000); + mpic_init(mpic); + /* The NMI/MCK source needs to be prio 15 */ + if (nmiprop) { + nmi_virq = irq_create_mapping(NULL, *nmiprop); + mpic_irq_set_priority(nmi_virq, 15); + irq_set_irq_type(nmi_virq, IRQ_TYPE_EDGE_RISING); + mpic_unmask_irq(irq_get_irq_data(nmi_virq)); + } + + nemo_init_IRQ(mpic); + + of_node_put(mpic_node); + of_node_put(root); +} + +static void __init pas_progress(char *s, unsigned short hex) +{ + printk("[%04x] : %s\n", hex, s ? s : ""); +} + + +static int pas_machine_check_handler(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long srr0, srr1, dsisr; + int dump_slb = 0; + int i; + + srr0 = regs->nip; + srr1 = regs->msr; + + if (nmi_virq && mpic_get_mcirq() == nmi_virq) { + pr_err("NMI delivered\n"); + debugger(regs); + mpic_end_irq(irq_get_irq_data(nmi_virq)); + goto out; + } + + dsisr = mfspr(SPRN_DSISR); + pr_err("Machine Check on CPU %d\n", cpu); + pr_err("SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1); + pr_err("DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar); + pr_err("BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER), + mfspr(SPRN_PA6T_MER)); + pr_err("IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER), + mfspr(SPRN_PA6T_DER)); + pr_err("Cause:\n"); + + if (srr1 & 0x200000) + pr_err("Signalled by SDC\n"); + + if (srr1 & 0x100000) { + pr_err("Load/Store detected error:\n"); + if (dsisr & 0x8000) + pr_err("D-cache ECC double-bit error or bus error\n"); + if (dsisr & 0x4000) + pr_err("LSU snoop response error\n"); + if (dsisr & 0x2000) { + pr_err("MMU SLB multi-hit or invalid B field\n"); + dump_slb = 1; + } + if (dsisr & 0x1000) + pr_err("Recoverable Duptags\n"); + if (dsisr & 0x800) + pr_err("Recoverable D-cache parity error count overflow\n"); + if (dsisr & 0x400) + pr_err("TLB parity error count overflow\n"); + } + + if (srr1 & 0x80000) + pr_err("Bus Error\n"); + + if (srr1 & 0x40000) { + pr_err("I-side SLB multiple hit\n"); + dump_slb = 1; + } + + if (srr1 & 0x20000) + pr_err("I-cache parity error hit\n"); + + if (num_mce_regs == 0) + pr_err("No MCE registers mapped yet, can't dump\n"); + else + pr_err("SoC debug registers:\n"); + + for (i = 0; i < num_mce_regs; i++) + pr_err("%s: 0x%08x\n", mce_regs[i].name, + in_le32(mce_regs[i].addr)); + + if (dump_slb) { + unsigned long e, v; + int i; + + pr_err("slb contents:\n"); + for (i = 0; i < mmu_slb_size; i++) { + asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i)); + asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i)); + pr_err("%02d %016lx %016lx\n", i, e, v); + } + } + +out: + /* SRR1[62] is from MSR[62] if recoverable, so pass that back */ + return !!(srr1 & 0x2); +} + +static const struct of_device_id pasemi_bus_ids[] = { + /* Unfortunately needed for legacy firmwares */ + { .type = "localbus", }, + { .type = "sdc", }, + /* These are the proper entries, which newer firmware uses */ + { .compatible = "pasemi,localbus", }, + { .compatible = "pasemi,sdc", }, + {}, +}; + +static int __init pasemi_publish_devices(void) +{ + /* Publish OF platform devices for SDC and other non-PCI devices */ + of_platform_bus_probe(NULL, pasemi_bus_ids, NULL); + + nemo_init_rtc(); + + return 0; +} +machine_device_initcall(pasemi, pasemi_publish_devices); + + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init pas_probe(void) +{ + if (!of_machine_is_compatible("PA6T-1682M") && + !of_machine_is_compatible("pasemi,pwrficient")) + return 0; + +#ifdef CONFIG_PPC_PASEMI_NEMO + /* + * Check for the Nemo motherboard here, if we are running on one + * change the machine definition to fit + */ + if (of_machine_is_compatible("pasemi,nemo")) { + pm_power_off = pas_shutdown; + ppc_md.name = "A-EON Amigaone X1000"; + } +#endif + + iommu_init_early_pasemi(); + + return 1; +} + +define_machine(pasemi) { + .name = "PA Semi PWRficient", + .probe = pas_probe, + .setup_arch = pas_setup_arch, + .discover_phbs = pas_pci_init, + .init_IRQ = pas_init_IRQ, + .get_irq = mpic_get_irq, + .restart = pas_restart, + .get_boot_time = pas_get_boot_time, + .progress = pas_progress, + .machine_check_exception = pas_machine_check_handler, +}; diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c new file mode 100644 index 0000000000..70ac6db027 --- /dev/null +++ b/arch/powerpc/platforms/pasemi/time.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2006 PA Semi, Inc + * + * Maintained by: Olof Johansson + */ + +#include + +#include + +#include "pasemi.h" + +time64_t __init pas_get_boot_time(void) +{ + /* Let's just return a fake date right now */ + return mktime64(2006, 1, 1, 12, 0, 0); +} diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig new file mode 100644 index 0000000000..130707ec9f --- /dev/null +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_PMAC + bool "Apple PowerMac based machines" + depends on PPC_BOOK3S && CPU_BIG_ENDIAN + select MPIC + select FORCE_PCI + select PPC_INDIRECT_PCI if PPC32 + select PPC_MPC106 if PPC32 + select PPC_64S_HASH_MMU if PPC64 + select PPC_HASH_MMU_NATIVE + select ZONE_DMA if PPC32 + default y + +config PPC_PMAC64 + bool + depends on PPC_PMAC && PPC64 + select MPIC + select U3_DART + select MPIC_U3_HT_IRQS + select GENERIC_TBSYNC + select PPC_970_NAP + default y + +config PPC_PMAC32_PSURGE + bool "Support for powersurge upgrade cards" if EXPERT + depends on SMP && PPC32 && PPC_PMAC + select PPC_SMP_MUXED_IPI + select IRQ_DOMAIN_NOMAP + default y + help + The powersurge cpu boards can be used in the generation + of powermacs that have a socket for an upgradeable cpu card, + including the 7500, 8500, 9500, 9600. Support exists for + both dual and quad socket upgrade cards. diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile new file mode 100644 index 0000000000..cf85f0662d --- /dev/null +++ b/arch/powerpc/platforms/powermac/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS_bootx_init.o += -fPIC +CFLAGS_bootx_init.o += -fno-stack-protector + +KASAN_SANITIZE_bootx_init.o := n + +ifdef CONFIG_KASAN +CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING +endif + +ifdef CONFIG_FUNCTION_TRACER +# Do not trace early boot code +CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE) +endif + +obj-y += pic.o setup.o time.o feature.o pci.o \ + sleep.o low_i2c.o cache.o pfunc_core.o \ + pfunc_base.o udbg_scc.o udbg_adb.o +obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o +# CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really +# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really +# CONFIG_NVRAM=y +obj-$(CONFIG_NVRAM:m=y) += nvram.o +obj-$(CONFIG_PPC32) += bootx_init.o +obj-$(CONFIG_SMP) += smp.o diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c new file mode 100644 index 0000000000..aeb79a8b3e --- /dev/null +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Miscellaneous procedures for dealing with the PowerMac hardware. + * Contains support for the backlight. + * + * Copyright (C) 2000 Benjamin Herrenschmidt + * Copyright (C) 2006 Michael Hanselmann + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define OLD_BACKLIGHT_MAX 15 + +static void pmac_backlight_key_worker(struct work_struct *work); +static void pmac_backlight_set_legacy_worker(struct work_struct *work); + +static DECLARE_WORK(pmac_backlight_key_work, pmac_backlight_key_worker); +static DECLARE_WORK(pmac_backlight_set_legacy_work, pmac_backlight_set_legacy_worker); + +/* Although these variables are used in interrupt context, it makes no sense to + * protect them. No user is able to produce enough key events per second and + * notice the errors that might happen. + */ +static int pmac_backlight_key_queued; +static int pmac_backlight_set_legacy_queued; + +/* The via-pmu code allows the backlight to be grabbed, in which case the + * in-kernel control of the brightness needs to be disabled. This should + * only be used by really old PowerBooks. + */ +static atomic_t kernel_backlight_disabled = ATOMIC_INIT(0); + +/* Protect the pmac_backlight variable below. + You should hold this lock when using the pmac_backlight pointer to + prevent its potential removal. */ +DEFINE_MUTEX(pmac_backlight_mutex); + +/* Main backlight storage + * + * Backlight drivers in this variable are required to have the "ops" + * attribute set and to have an update_status function. + * + * We can only store one backlight here, but since Apple laptops have only one + * internal display, it doesn't matter. Other backlight drivers can be used + * independently. + * + */ +struct backlight_device *pmac_backlight; + +int pmac_has_backlight_type(const char *type) +{ + struct device_node* bk_node = of_find_node_by_name(NULL, "backlight"); + + if (bk_node) { + const char *prop = of_get_property(bk_node, + "backlight-control", NULL); + if (prop && strncmp(prop, type, strlen(type)) == 0) { + of_node_put(bk_node); + return 1; + } + of_node_put(bk_node); + } + + return 0; +} + +int pmac_backlight_curve_lookup(struct fb_info *info, int value) +{ + int level = (FB_BACKLIGHT_LEVELS - 1); + + if (info && info->bl_dev) { + int i, max = 0; + + /* Look for biggest value */ + for (i = 0; i < FB_BACKLIGHT_LEVELS; i++) + max = max((int)info->bl_curve[i], max); + + /* Look for nearest value */ + for (i = 0; i < FB_BACKLIGHT_LEVELS; i++) { + int diff = abs(info->bl_curve[i] - value); + if (diff < max) { + max = diff; + level = i; + } + } + + } + + return level; +} + +static void pmac_backlight_key_worker(struct work_struct *work) +{ + if (atomic_read(&kernel_backlight_disabled)) + return; + + mutex_lock(&pmac_backlight_mutex); + if (pmac_backlight) { + struct backlight_properties *props; + int brightness; + + props = &pmac_backlight->props; + + brightness = props->brightness + + ((pmac_backlight_key_queued?-1:1) * + (props->max_brightness / 15)); + + if (brightness < 0) + brightness = 0; + else if (brightness > props->max_brightness) + brightness = props->max_brightness; + + props->brightness = brightness; + backlight_update_status(pmac_backlight); + } + mutex_unlock(&pmac_backlight_mutex); +} + +/* This function is called in interrupt context */ +void pmac_backlight_key(int direction) +{ + if (atomic_read(&kernel_backlight_disabled)) + return; + + /* we can receive multiple interrupts here, but the scheduled work + * will run only once, with the last value + */ + pmac_backlight_key_queued = direction; + schedule_work(&pmac_backlight_key_work); +} + +static int __pmac_backlight_set_legacy_brightness(int brightness) +{ + int error = -ENXIO; + + mutex_lock(&pmac_backlight_mutex); + if (pmac_backlight) { + struct backlight_properties *props; + + props = &pmac_backlight->props; + props->brightness = brightness * + (props->max_brightness + 1) / + (OLD_BACKLIGHT_MAX + 1); + + if (props->brightness > props->max_brightness) + props->brightness = props->max_brightness; + else if (props->brightness < 0) + props->brightness = 0; + + backlight_update_status(pmac_backlight); + + error = 0; + } + mutex_unlock(&pmac_backlight_mutex); + + return error; +} + +static void pmac_backlight_set_legacy_worker(struct work_struct *work) +{ + if (atomic_read(&kernel_backlight_disabled)) + return; + + __pmac_backlight_set_legacy_brightness(pmac_backlight_set_legacy_queued); +} + +/* This function is called in interrupt context */ +void pmac_backlight_set_legacy_brightness_pmu(int brightness) { + if (atomic_read(&kernel_backlight_disabled)) + return; + + pmac_backlight_set_legacy_queued = brightness; + schedule_work(&pmac_backlight_set_legacy_work); +} + +int pmac_backlight_set_legacy_brightness(int brightness) +{ + return __pmac_backlight_set_legacy_brightness(brightness); +} + +int pmac_backlight_get_legacy_brightness(void) +{ + int result = -ENXIO; + + mutex_lock(&pmac_backlight_mutex); + if (pmac_backlight) { + struct backlight_properties *props; + + props = &pmac_backlight->props; + + result = props->brightness * + (OLD_BACKLIGHT_MAX + 1) / + (props->max_brightness + 1); + } + mutex_unlock(&pmac_backlight_mutex); + + return result; +} + +void pmac_backlight_disable(void) +{ + atomic_inc(&kernel_backlight_disabled); +} + +void pmac_backlight_enable(void) +{ + atomic_dec(&kernel_backlight_disabled); +} + +EXPORT_SYMBOL_GPL(pmac_backlight); +EXPORT_SYMBOL_GPL(pmac_backlight_mutex); +EXPORT_SYMBOL_GPL(pmac_has_backlight_type); diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c new file mode 100644 index 0000000000..72eb99aba4 --- /dev/null +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Early boot support code for BootX bootloader + * + * Copyright (C) 2005 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG +#define SET_BOOT_BAT + +#ifdef DEBUG +#define DBG(fmt...) do { bootx_printf(fmt); } while(0) +#else +#define DBG(fmt...) do { } while(0) +#endif + +extern void __start(unsigned long r3, unsigned long r4, unsigned long r5); + +static unsigned long __initdata bootx_dt_strbase; +static unsigned long __initdata bootx_dt_strend; +static unsigned long __initdata bootx_node_chosen; +static boot_infos_t * __initdata bootx_info; +static char __initdata bootx_disp_path[256]; + +/* Is boot-info compatible ? */ +#define BOOT_INFO_IS_COMPATIBLE(bi) \ + ((bi)->compatible_version <= BOOT_INFO_VERSION) +#define BOOT_INFO_IS_V2_COMPATIBLE(bi) ((bi)->version >= 2) +#define BOOT_INFO_IS_V4_COMPATIBLE(bi) ((bi)->version >= 4) + +#ifdef CONFIG_BOOTX_TEXT +static void __init bootx_printf(const char *format, ...) +{ + const char *p, *q, *s; + va_list args; + unsigned long v; + + va_start(args, format); + for (p = format; *p != 0; p = q) { + for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q) + ; + if (q > p) + btext_drawtext(p, q - p); + if (*q == 0) + break; + if (*q == '\n') { + ++q; + btext_flushline(); + btext_drawstring("\r\n"); + btext_flushline(); + continue; + } + ++q; + if (*q == 0) + break; + switch (*q) { + case 's': + ++q; + s = va_arg(args, const char *); + if (s == NULL) + s = ""; + btext_drawstring(s); + break; + case 'x': + ++q; + v = va_arg(args, unsigned long); + btext_drawhex(v); + break; + } + } + va_end(args); +} +#else /* CONFIG_BOOTX_TEXT */ +static void __init bootx_printf(const char *format, ...) {} +#endif /* CONFIG_BOOTX_TEXT */ + +static void * __init bootx_early_getprop(unsigned long base, + unsigned long node, + char *prop) +{ + struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node); + u32 *ppp = &np->properties; + + while(*ppp) { + struct bootx_dt_prop *pp = + (struct bootx_dt_prop *)(base + *ppp); + + if (strcmp((char *)((unsigned long)pp->name + base), + prop) == 0) { + return (void *)((unsigned long)pp->value + base); + } + ppp = &pp->next; + } + return NULL; +} + +#define dt_push_token(token, mem) \ + do { \ + *(mem) = ALIGN(*(mem),4); \ + *((u32 *)*(mem)) = token; \ + *(mem) += 4; \ + } while(0) + +static unsigned long __init bootx_dt_find_string(char *str) +{ + char *s, *os; + + s = os = (char *)bootx_dt_strbase; + s += 4; + while (s < (char *)bootx_dt_strend) { + if (strcmp(s, str) == 0) + return s - os; + s += strlen(s) + 1; + } + return 0; +} + +static void __init bootx_dt_add_prop(char *name, void *data, int size, + unsigned long *mem_end) +{ + unsigned long soff = bootx_dt_find_string(name); + if (data == NULL) + size = 0; + if (soff == 0) { + bootx_printf("WARNING: Can't find string index for <%s>\n", + name); + return; + } + if (size > 0x20000) { + bootx_printf("WARNING: ignoring large property "); + bootx_printf("%s length 0x%x\n", name, size); + return; + } + dt_push_token(OF_DT_PROP, mem_end); + dt_push_token(size, mem_end); + dt_push_token(soff, mem_end); + + /* push property content */ + if (size && data) { + memcpy((void *)*mem_end, data, size); + *mem_end = ALIGN(*mem_end + size, 4); + } +} + +static void __init bootx_add_chosen_props(unsigned long base, + unsigned long *mem_end) +{ + u32 val; + + bootx_dt_add_prop("linux,bootx", NULL, 0, mem_end); + + if (bootx_info->kernelParamsOffset) { + char *args = (char *)((unsigned long)bootx_info) + + bootx_info->kernelParamsOffset; + bootx_dt_add_prop("bootargs", args, strlen(args) + 1, mem_end); + } + if (bootx_info->ramDisk) { + val = ((unsigned long)bootx_info) + bootx_info->ramDisk; + bootx_dt_add_prop("linux,initrd-start", &val, 4, mem_end); + val += bootx_info->ramDiskSize; + bootx_dt_add_prop("linux,initrd-end", &val, 4, mem_end); + } + if (strlen(bootx_disp_path)) + bootx_dt_add_prop("linux,stdout-path", bootx_disp_path, + strlen(bootx_disp_path) + 1, mem_end); +} + +static void __init bootx_add_display_props(unsigned long base, + unsigned long *mem_end, + int has_real_node) +{ + boot_infos_t *bi = bootx_info; + u32 tmp; + + if (has_real_node) { + bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end); + bootx_dt_add_prop("linux,opened", NULL, 0, mem_end); + } else + bootx_dt_add_prop("linux,bootx-noscreen", NULL, 0, mem_end); + + tmp = bi->dispDeviceDepth; + bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end); + tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0]; + bootx_dt_add_prop("linux,bootx-width", &tmp, 4, mem_end); + tmp = bi->dispDeviceRect[3] - bi->dispDeviceRect[1]; + bootx_dt_add_prop("linux,bootx-height", &tmp, 4, mem_end); + tmp = bi->dispDeviceRowBytes; + bootx_dt_add_prop("linux,bootx-linebytes", &tmp, 4, mem_end); + tmp = (u32)bi->dispDeviceBase; + if (tmp == 0) + tmp = (u32)bi->logicalDisplayBase; + tmp += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes; + tmp += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8); + bootx_dt_add_prop("linux,bootx-addr", &tmp, 4, mem_end); +} + +static void __init bootx_dt_add_string(char *s, unsigned long *mem_end) +{ + unsigned int l = strlen(s) + 1; + memcpy((void *)*mem_end, s, l); + bootx_dt_strend = *mem_end = *mem_end + l; +} + +static void __init bootx_scan_dt_build_strings(unsigned long base, + unsigned long node, + unsigned long *mem_end) +{ + struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node); + u32 *cpp, *ppp = &np->properties; + unsigned long soff; + char *namep; + + /* Keep refs to known nodes */ + namep = np->full_name ? (char *)(base + np->full_name) : NULL; + if (namep == NULL) { + bootx_printf("Node without a full name !\n"); + namep = ""; + } + DBG("* strings: %s\n", namep); + + if (!strcmp(namep, "/chosen")) { + DBG(" detected /chosen ! adding properties names !\n"); + bootx_dt_add_string("linux,bootx", mem_end); + bootx_dt_add_string("linux,stdout-path", mem_end); + bootx_dt_add_string("linux,initrd-start", mem_end); + bootx_dt_add_string("linux,initrd-end", mem_end); + bootx_dt_add_string("bootargs", mem_end); + bootx_node_chosen = node; + } + if (node == bootx_info->dispDeviceRegEntryOffset) { + DBG(" detected display ! adding properties names !\n"); + bootx_dt_add_string("linux,boot-display", mem_end); + bootx_dt_add_string("linux,opened", mem_end); + strscpy(bootx_disp_path, namep, sizeof(bootx_disp_path)); + } + + /* get and store all property names */ + while (*ppp) { + struct bootx_dt_prop *pp = + (struct bootx_dt_prop *)(base + *ppp); + + namep = pp->name ? (char *)(base + pp->name) : NULL; + if (namep == NULL || strcmp(namep, "name") == 0) + goto next; + /* get/create string entry */ + soff = bootx_dt_find_string(namep); + if (soff == 0) + bootx_dt_add_string(namep, mem_end); + next: + ppp = &pp->next; + } + + /* do all our children */ + cpp = &np->child; + while(*cpp) { + np = (struct bootx_dt_node *)(base + *cpp); + bootx_scan_dt_build_strings(base, *cpp, mem_end); + cpp = &np->sibling; + } +} + +static void __init bootx_scan_dt_build_struct(unsigned long base, + unsigned long node, + unsigned long *mem_end) +{ + struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node); + u32 *cpp, *ppp = &np->properties; + char *namep, *p, *ep, *lp; + int l; + + dt_push_token(OF_DT_BEGIN_NODE, mem_end); + + /* get the node's full name */ + namep = np->full_name ? (char *)(base + np->full_name) : NULL; + if (namep == NULL) + namep = ""; + l = strlen(namep); + + DBG("* struct: %s\n", namep); + + /* Fixup an Apple bug where they have bogus \0 chars in the + * middle of the path in some properties, and extract + * the unit name (everything after the last '/'). + */ + memcpy((void *)*mem_end, namep, l + 1); + namep = (char *)*mem_end; + for (lp = p = namep, ep = namep + l; p < ep; p++) { + if (*p == '/') + lp = namep; + else if (*p != 0) + *lp++ = *p; + } + *lp = 0; + *mem_end = ALIGN((unsigned long)lp + 1, 4); + + /* get and store all properties */ + while (*ppp) { + struct bootx_dt_prop *pp = + (struct bootx_dt_prop *)(base + *ppp); + + namep = pp->name ? (char *)(base + pp->name) : NULL; + /* Skip "name" */ + if (namep == NULL || !strcmp(namep, "name")) + goto next; + /* Skip "bootargs" in /chosen too as we replace it */ + if (node == bootx_node_chosen && !strcmp(namep, "bootargs")) + goto next; + + /* push property head */ + bootx_dt_add_prop(namep, + pp->value ? (void *)(base + pp->value): NULL, + pp->length, mem_end); + next: + ppp = &pp->next; + } + + if (node == bootx_node_chosen) { + bootx_add_chosen_props(base, mem_end); + if (bootx_info->dispDeviceRegEntryOffset == 0) + bootx_add_display_props(base, mem_end, 0); + } + else if (node == bootx_info->dispDeviceRegEntryOffset) + bootx_add_display_props(base, mem_end, 1); + + /* do all our children */ + cpp = &np->child; + while(*cpp) { + np = (struct bootx_dt_node *)(base + *cpp); + bootx_scan_dt_build_struct(base, *cpp, mem_end); + cpp = &np->sibling; + } + + dt_push_token(OF_DT_END_NODE, mem_end); +} + +static unsigned long __init bootx_flatten_dt(unsigned long start) +{ + boot_infos_t *bi = bootx_info; + unsigned long mem_start, mem_end; + struct boot_param_header *hdr; + unsigned long base; + u64 *rsvmap; + + /* Start using memory after the big blob passed by BootX, get + * some space for the header + */ + mem_start = mem_end = ALIGN(((unsigned long)bi) + start, 4); + DBG("Boot params header at: %x\n", mem_start); + hdr = (struct boot_param_header *)mem_start; + mem_end += sizeof(struct boot_param_header); + rsvmap = (u64 *)(ALIGN(mem_end, 8)); + hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start; + mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64); + + /* Get base of tree */ + base = ((unsigned long)bi) + bi->deviceTreeOffset; + + /* Build string array */ + DBG("Building string array at: %x\n", mem_end); + DBG("Device Tree Base=%x\n", base); + bootx_dt_strbase = mem_end; + mem_end += 4; + bootx_dt_strend = mem_end; + bootx_scan_dt_build_strings(base, 4, &mem_end); + /* Add some strings */ + bootx_dt_add_string("linux,bootx-noscreen", &mem_end); + bootx_dt_add_string("linux,bootx-depth", &mem_end); + bootx_dt_add_string("linux,bootx-width", &mem_end); + bootx_dt_add_string("linux,bootx-height", &mem_end); + bootx_dt_add_string("linux,bootx-linebytes", &mem_end); + bootx_dt_add_string("linux,bootx-addr", &mem_end); + /* Wrap up strings */ + hdr->off_dt_strings = bootx_dt_strbase - mem_start; + hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase; + + /* Build structure */ + mem_end = ALIGN(mem_end, 16); + DBG("Building device tree structure at: %x\n", mem_end); + hdr->off_dt_struct = mem_end - mem_start; + bootx_scan_dt_build_struct(base, 4, &mem_end); + dt_push_token(OF_DT_END, &mem_end); + + /* Finish header */ + hdr->boot_cpuid_phys = 0; + hdr->magic = OF_DT_HEADER; + hdr->totalsize = mem_end - mem_start; + hdr->version = OF_DT_VERSION; + /* Version 16 is not backward compatible */ + hdr->last_comp_version = 0x10; + + /* Reserve the whole thing and copy the reserve map in, we + * also bump mem_reserve_cnt to cause further reservations to + * fail since it's too late. + */ + mem_end = ALIGN(mem_end, PAGE_SIZE); + DBG("End of boot params: %x\n", mem_end); + rsvmap[0] = mem_start; + rsvmap[1] = mem_end; + if (bootx_info->ramDisk) { + rsvmap[2] = ((unsigned long)bootx_info) + bootx_info->ramDisk; + rsvmap[3] = rsvmap[2] + bootx_info->ramDiskSize; + rsvmap[4] = 0; + rsvmap[5] = 0; + } else { + rsvmap[2] = 0; + rsvmap[3] = 0; + } + + return (unsigned long)hdr; +} + + +#ifdef CONFIG_BOOTX_TEXT +static void __init btext_welcome(boot_infos_t *bi) +{ + unsigned long flags; + unsigned long pvr; + + bootx_printf("Welcome to Linux, kernel " UTS_RELEASE "\n"); + bootx_printf("\nlinked at : 0x%x", KERNELBASE); + bootx_printf("\nframe buffer at : 0x%x", bi->dispDeviceBase); + bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase); + bootx_printf(" (log)"); + bootx_printf("\nklimit : 0x%x",(unsigned long)_end); + bootx_printf("\nboot_info at : 0x%x", bi); + __asm__ __volatile__ ("mfmsr %0" : "=r" (flags)); + bootx_printf("\nMSR : 0x%x", flags); + __asm__ __volatile__ ("mfspr %0, 287" : "=r" (pvr)); + bootx_printf("\nPVR : 0x%x", pvr); + pvr >>= 16; + if (pvr > 1) { + __asm__ __volatile__ ("mfspr %0, 1008" : "=r" (flags)); + bootx_printf("\nHID0 : 0x%x", flags); + } + if (pvr == 8 || pvr == 12 || pvr == 0x800c) { + __asm__ __volatile__ ("mfspr %0, 1019" : "=r" (flags)); + bootx_printf("\nICTC : 0x%x", flags); + } +#ifdef DEBUG + bootx_printf("\n\n"); + bootx_printf("bi->deviceTreeOffset : 0x%x\n", + bi->deviceTreeOffset); + bootx_printf("bi->deviceTreeSize : 0x%x\n", + bi->deviceTreeSize); +#endif + bootx_printf("\n\n"); +} +#endif /* CONFIG_BOOTX_TEXT */ + +void __init bootx_init(unsigned long r3, unsigned long r4) +{ + boot_infos_t *bi = (boot_infos_t *) r4; + unsigned long hdr; + unsigned long space; + unsigned long ptr; + char *model; + unsigned long offset = reloc_offset(); + + reloc_got2(offset); + + bootx_info = bi; + + /* We haven't cleared any bss at this point, make sure + * what we need is initialized + */ + bootx_dt_strbase = bootx_dt_strend = 0; + bootx_node_chosen = 0; + bootx_disp_path[0] = 0; + + if (!BOOT_INFO_IS_V2_COMPATIBLE(bi)) + bi->logicalDisplayBase = bi->dispDeviceBase; + + /* Fixup depth 16 -> 15 as that's what MacOS calls 16bpp */ + if (bi->dispDeviceDepth == 16) + bi->dispDeviceDepth = 15; + + +#ifdef CONFIG_BOOTX_TEXT + ptr = (unsigned long)bi->logicalDisplayBase; + ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes; + ptr += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8); + btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0], + bi->dispDeviceRect[3] - bi->dispDeviceRect[1], + bi->dispDeviceDepth, bi->dispDeviceRowBytes, + (unsigned long)bi->logicalDisplayBase); + btext_clearscreen(); + btext_flushscreen(); +#endif /* CONFIG_BOOTX_TEXT */ + + /* + * Test if boot-info is compatible. Done only in config + * CONFIG_BOOTX_TEXT since there is nothing much we can do + * with an incompatible version, except display a message + * and eventually hang the processor... + * + * I'll try to keep enough of boot-info compatible in the + * future to always allow display of this message; + */ + if (!BOOT_INFO_IS_COMPATIBLE(bi)) { + bootx_printf(" !!! WARNING - Incompatible version" + " of BootX !!!\n\n\n"); + for (;;) + ; + } + if (bi->architecture != BOOT_ARCH_PCI) { + bootx_printf(" !!! WARNING - Unsupported machine" + " architecture !\n"); + for (;;) + ; + } + +#ifdef CONFIG_BOOTX_TEXT + btext_welcome(bi); +#endif + + /* New BootX enters kernel with MMU off, i/os are not allowed + * here. This hack will have been done by the boostrap anyway. + */ + if (bi->version < 4) { + /* + * XXX If this is an iMac, turn off the USB controller. + */ + model = (char *) bootx_early_getprop(r4 + bi->deviceTreeOffset, + 4, "model"); + if (model + && (strcmp(model, "iMac,1") == 0 + || strcmp(model, "PowerMac1,1") == 0)) { + bootx_printf("iMac,1 detected, shutting down USB\n"); + out_le32((unsigned __iomem *)0x80880008, 1); /* XXX */ + } + } + + /* Get a pointer that points above the device tree, args, ramdisk, + * etc... to use for generating the flattened tree + */ + if (bi->version < 5) { + space = bi->deviceTreeOffset + bi->deviceTreeSize; + if (bi->ramDisk >= space) + space = bi->ramDisk + bi->ramDiskSize; + } else + space = bi->totalParamsSize; + + bootx_printf("Total space used by parameters & ramdisk: 0x%x\n", space); + + /* New BootX will have flushed all TLBs and enters kernel with + * MMU switched OFF, so this should not be useful anymore. + */ + if (bi->version < 4) { + unsigned long x __maybe_unused; + + bootx_printf("Touching pages...\n"); + + /* + * Touch each page to make sure the PTEs for them + * are in the hash table - the aim is to try to avoid + * getting DSI exceptions while copying the kernel image. + */ + for (ptr = ((unsigned long) &_stext) & PAGE_MASK; + ptr < (unsigned long)bi + space; ptr += PAGE_SIZE) + x = *(volatile unsigned long *)ptr; + } + + /* Ok, now we need to generate a flattened device-tree to pass + * to the kernel + */ + bootx_printf("Preparing boot params...\n"); + + hdr = bootx_flatten_dt(space); + +#ifdef CONFIG_BOOTX_TEXT +#ifdef SET_BOOT_BAT + bootx_printf("Preparing BAT...\n"); + btext_prepare_BAT(); +#else + btext_unmap(); +#endif +#endif + + reloc_got2(-offset); + + __start(hdr, KERNELBASE + offset, 0); +} diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S new file mode 100644 index 0000000000..b8ae56e9f4 --- /dev/null +++ b/arch/powerpc/platforms/powermac/cache.S @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains low-level cache management functions + * used for sleep and CPU speed changes on Apple machines. + * (In fact the only thing that is Apple-specific is that we assume + * that we can read from ROM at physical address 0xfff00000.) + * + * Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and + * Benjamin Herrenschmidt (benh@kernel.crashing.org) + */ + +#include +#include +#include +#include + +/* + * Flush and disable all data caches (dL1, L2, L3). This is used + * when going to sleep, when doing a PMU based cpufreq transition, + * or when "offlining" a CPU on SMP machines. This code is over + * paranoid, but I've had enough issues with various CPU revs and + * bugs that I decided it was worth being over cautious + */ + +_GLOBAL(flush_disable_caches) +#ifndef CONFIG_PPC_BOOK3S_32 + blr +#else +BEGIN_FTR_SECTION + b flush_disable_745x +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) +BEGIN_FTR_SECTION + b flush_disable_75x +END_FTR_SECTION_IFSET(CPU_FTR_L2CR) + b __flush_disable_L1 + +/* This is the code for G3 and 74[01]0 */ +flush_disable_75x: + mflr r10 + + /* Turn off EE and DR in MSR */ + mfmsr r11 + rlwinm r0,r11,0,~MSR_EE + rlwinm r0,r0,0,~MSR_DR + sync + mtmsr r0 + isync + + /* Stop DST streams */ +BEGIN_FTR_SECTION + PPC_DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + + /* Stop DPM */ + mfspr r8,SPRN_HID0 /* Save SPRN_HID0 in r8 */ + rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */ + sync + mtspr SPRN_HID0,r4 /* Disable DPM */ + sync + + /* Disp-flush L1. We have a weird problem here that I never + * totally figured out. On 750FX, using the ROM for the flush + * results in a non-working flush. We use that workaround for + * now until I finally understand what's going on. --BenH + */ + + /* ROM base by default */ + lis r4,0xfff0 + mfpvr r3 + srwi r3,r3,16 + cmplwi cr0,r3,0x7000 + bne+ 1f + /* RAM base on 750FX */ + li r4,0 +1: li r4,0x4000 + mtctr r4 +1: lwz r0,0(r4) + addi r4,r4,32 + bdnz 1b + sync + isync + + /* Disable / invalidate / enable L1 data */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,~(HID0_DCE | HID0_ICE) + mtspr SPRN_HID0,r3 + sync + isync + ori r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI) + sync + isync + mtspr SPRN_HID0,r3 + xori r3,r3,(HID0_DCI|HID0_ICFI) + mtspr SPRN_HID0,r3 + sync + + /* Get the current enable bit of the L2CR into r4 */ + mfspr r5,SPRN_L2CR + /* Set to data-only (pre-745x bit) */ + oris r3,r5,L2CR_L2DO@h + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r3 +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: /* disp-flush L2. The interesting thing here is that the L2 can be + * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory + * but that is probbaly fine. We disp-flush over 4Mb to be safe + */ + lis r4,2 + mtctr r4 + lis r4,0xfff0 +1: lwz r0,0(r4) + addi r4,r4,32 + bdnz 1b + sync + isync + lis r4,2 + mtctr r4 + lis r4,0xfff0 +1: dcbf 0,r4 + addi r4,r4,32 + bdnz 1b + sync + isync + + /* now disable L2 */ + rlwinm r5,r5,0,~L2CR_L2E + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r5 +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + /* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */ + oris r4,r5,L2CR_L2I@h + mtspr SPRN_L2CR,r4 + sync + isync + + /* Wait for the invalidation to complete */ +1: mfspr r3,SPRN_L2CR + rlwinm. r0,r3,0,31,31 + bne 1b + + /* Clear L2I */ + xoris r4,r4,L2CR_L2I@h + sync + mtspr SPRN_L2CR,r4 + sync + + /* now disable the L1 data cache */ + mfspr r0,SPRN_HID0 + rlwinm r0,r0,0,~(HID0_DCE|HID0_ICE) + mtspr SPRN_HID0,r0 + sync + isync + + /* Restore HID0[DPM] to whatever it was before */ + sync + mfspr r0,SPRN_HID0 + rlwimi r0,r8,0,11,11 /* Turn back HID0[DPM] */ + mtspr SPRN_HID0,r0 + sync + + /* restore DR and EE */ + sync + mtmsr r11 + isync + + mtlr r10 + blr +_ASM_NOKPROBE_SYMBOL(flush_disable_75x) + +/* This code is for 745x processors */ +flush_disable_745x: + /* Turn off EE and DR in MSR */ + mfmsr r11 + rlwinm r0,r11,0,~MSR_EE + rlwinm r0,r0,0,~MSR_DR + sync + mtmsr r0 + isync + + /* Stop prefetch streams */ + PPC_DSSALL + sync + + /* Disable L2 prefetching */ + mfspr r0,SPRN_MSSCR0 + rlwinm r0,r0,0,0,29 + mtspr SPRN_MSSCR0,r0 + sync + isync + lis r4,0 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + + /* Due to a bug with the HW flush on some CPU revs, we occasionally + * experience data corruption. I'm adding a displacement flush along + * with a dcbf loop over a few Mb to "help". The problem isn't totally + * fixed by this in theory, but at least, in practice, I couldn't reproduce + * it even with a big hammer... + */ + + lis r4,0x0002 + mtctr r4 + li r4,0 +1: + lwz r0,0(r4) + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + isync + + /* Now, flush the first 4MB of memory */ + lis r4,0x0002 + mtctr r4 + li r4,0 + sync +1: + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + + /* Flush and disable the L1 data cache */ + mfspr r6,SPRN_LDSTCR + lis r3,0xfff0 /* read from ROM for displacement flush */ + li r4,0xfe /* start with only way 0 unlocked */ + li r5,128 /* 128 lines in each way */ +1: mtctr r5 + rlwimi r6,r4,0,24,31 + mtspr SPRN_LDSTCR,r6 + sync + isync +2: lwz r0,0(r3) /* touch each cache line */ + addi r3,r3,32 + bdnz 2b + rlwinm r4,r4,1,24,30 /* move on to the next way */ + ori r4,r4,1 + cmpwi r4,0xff /* all done? */ + bne 1b + /* now unlock the L1 data cache */ + li r4,0 + rlwimi r6,r4,0,24,31 + sync + mtspr SPRN_LDSTCR,r6 + sync + isync + + /* Flush the L2 cache using the hardware assist */ + mfspr r3,SPRN_L2CR + cmpwi r3,0 /* check if it is enabled first */ + bge 4f + oris r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h + b 2f + /* When disabling/locking L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r0 /* lock the L2 cache */ +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + ori r0,r3,L2CR_L2HWF_745x + sync + mtspr SPRN_L2CR,r0 /* set the hardware flush bit */ +3: mfspr r0,SPRN_L2CR /* wait for it to go to 0 */ + andi. r0,r0,L2CR_L2HWF_745x + bne 3b + sync + rlwinm r3,r3,0,~L2CR_L2E + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r3 /* disable the L2 cache */ +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + oris r4,r3,L2CR_L2I@h + mtspr SPRN_L2CR,r4 + sync + isync +1: mfspr r4,SPRN_L2CR + andis. r0,r4,L2CR_L2I@h + bne 1b + sync + +BEGIN_FTR_SECTION + /* Flush the L3 cache using the hardware assist */ +4: mfspr r3,SPRN_L3CR + cmpwi r3,0 /* check if it is enabled */ + bge 6f + oris r0,r3,L3CR_L3IO@h + ori r0,r0,L3CR_L3DO + sync + mtspr SPRN_L3CR,r0 /* lock the L3 cache */ + sync + isync + ori r0,r0,L3CR_L3HWF + sync + mtspr SPRN_L3CR,r0 /* set the hardware flush bit */ +5: mfspr r0,SPRN_L3CR /* wait for it to go to zero */ + andi. r0,r0,L3CR_L3HWF + bne 5b + rlwinm r3,r3,0,~L3CR_L3E + sync + mtspr SPRN_L3CR,r3 /* disable the L3 cache */ + sync + ori r4,r3,L3CR_L3I + mtspr SPRN_L3CR,r4 +1: mfspr r4,SPRN_L3CR + andi. r0,r4,L3CR_L3I + bne 1b + sync +END_FTR_SECTION_IFSET(CPU_FTR_L3CR) + +6: mfspr r0,SPRN_HID0 /* now disable the L1 data cache */ + rlwinm r0,r0,0,~HID0_DCE + mtspr SPRN_HID0,r0 + sync + isync + mtmsr r11 /* restore DR and EE */ + isync + blr +_ASM_NOKPROBE_SYMBOL(flush_disable_745x) +#endif /* CONFIG_PPC_BOOK3S_32 */ diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c new file mode 100644 index 0000000000..ae62d432db --- /dev/null +++ b/arch/powerpc/platforms/powermac/feature.c @@ -0,0 +1,3022 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au) + * Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * TODO: + * + * - Replace mdelay with some schedule loop if possible + * - Shorten some obfuscated delays on some routines (like modem + * power) + * - Refcount some clocks (see darwin) + * - Split split split... + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +#undef DEBUG_FEATURE + +#ifdef DEBUG_FEATURE +#define DBG(fmt...) printk(KERN_DEBUG fmt) +#else +#define DBG(fmt...) +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 +extern int powersave_lowspeed; +#endif + +extern int powersave_nap; +extern struct device_node *k2_skiplist[2]; + +/* + * We use a single global lock to protect accesses. Each driver has + * to take care of its own locking + */ +DEFINE_RAW_SPINLOCK(feature_lock); + +#define LOCK(flags) raw_spin_lock_irqsave(&feature_lock, flags); +#define UNLOCK(flags) raw_spin_unlock_irqrestore(&feature_lock, flags); + + +/* + * Instance of some macio stuffs + */ +struct macio_chip macio_chips[MAX_MACIO_CHIPS]; + +struct macio_chip *macio_find(struct device_node *child, int type) +{ + while(child) { + int i; + + for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++) + if (child == macio_chips[i].of_node && + (!type || macio_chips[i].type == type)) + return &macio_chips[i]; + child = child->parent; + } + return NULL; +} +EXPORT_SYMBOL_GPL(macio_find); + +static const char *macio_names[] = +{ + "Unknown", + "Grand Central", + "OHare", + "OHareII", + "Heathrow", + "Gatwick", + "Paddington", + "Keylargo", + "Pangea", + "Intrepid", + "K2", + "Shasta", +}; + + +struct device_node *uninorth_node; +u32 __iomem *uninorth_base; + +static u32 uninorth_rev; +static int uninorth_maj; +static void __iomem *u3_ht_base; + +/* + * For each motherboard family, we have a table of functions pointers + * that handle the various features. + */ + +typedef long (*feature_call)(struct device_node *node, long param, long value); + +struct feature_table_entry { + unsigned int selector; + feature_call function; +}; + +struct pmac_mb_def +{ + const char* model_string; + const char* model_name; + int model_id; + struct feature_table_entry* features; + unsigned long board_flags; +}; +static struct pmac_mb_def pmac_mb; + +/* + * Here are the chip specific feature functions + */ + +#ifndef CONFIG_PPC64 + +static int simple_feature_tweak(struct device_node *node, int type, int reg, + u32 mask, int value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, type); + if (!macio) + return -ENODEV; + LOCK(flags); + if (value) + MACIO_BIS(reg, mask); + else + MACIO_BIC(reg, mask); + (void)MACIO_IN32(reg); + UNLOCK(flags); + + return 0; +} + +static long ohare_htw_scc_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long chan_mask; + unsigned long fcr; + unsigned long flags; + int htw, trans; + unsigned long rmask; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (of_node_name_eq(node, "ch-a")) + chan_mask = MACIO_FLAG_SCCA_ON; + else if (of_node_name_eq(node, "ch-b")) + chan_mask = MACIO_FLAG_SCCB_ON; + else + return -ENODEV; + + htw = (macio->type == macio_heathrow || macio->type == macio_paddington + || macio->type == macio_gatwick); + /* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */ + trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE && + pmac_mb.model_id != PMAC_TYPE_YIKES); + if (value) { +#ifdef CONFIG_ADB_PMU + if ((param & 0xfff) == PMAC_SCC_IRDA) + pmu_enable_irled(1); +#endif /* CONFIG_ADB_PMU */ + LOCK(flags); + fcr = MACIO_IN32(OHARE_FCR); + /* Check if scc cell need enabling */ + if (!(fcr & OH_SCC_ENABLE)) { + fcr |= OH_SCC_ENABLE; + if (htw) { + /* Side effect: this will also power up the + * modem, but it's too messy to figure out on which + * ports this controls the transceiver and on which + * it controls the modem + */ + if (trans) + fcr &= ~HRW_SCC_TRANS_EN_N; + MACIO_OUT32(OHARE_FCR, fcr); + fcr |= (rmask = HRW_RESET_SCC); + MACIO_OUT32(OHARE_FCR, fcr); + } else { + fcr |= (rmask = OH_SCC_RESET); + MACIO_OUT32(OHARE_FCR, fcr); + } + UNLOCK(flags); + (void)MACIO_IN32(OHARE_FCR); + mdelay(15); + LOCK(flags); + fcr &= ~rmask; + MACIO_OUT32(OHARE_FCR, fcr); + } + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr |= OH_SCCA_IO; + if (chan_mask & MACIO_FLAG_SCCB_ON) + fcr |= OH_SCCB_IO; + MACIO_OUT32(OHARE_FCR, fcr); + macio->flags |= chan_mask; + UNLOCK(flags); + if (param & PMAC_SCC_FLAG_XMON) + macio->flags |= MACIO_FLAG_SCC_LOCKED; + } else { + if (macio->flags & MACIO_FLAG_SCC_LOCKED) + return -EPERM; + LOCK(flags); + fcr = MACIO_IN32(OHARE_FCR); + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr &= ~OH_SCCA_IO; + if (chan_mask & MACIO_FLAG_SCCB_ON) + fcr &= ~OH_SCCB_IO; + MACIO_OUT32(OHARE_FCR, fcr); + if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) { + fcr &= ~OH_SCC_ENABLE; + if (htw && trans) + fcr |= HRW_SCC_TRANS_EN_N; + MACIO_OUT32(OHARE_FCR, fcr); + } + macio->flags &= ~(chan_mask); + UNLOCK(flags); + mdelay(10); +#ifdef CONFIG_ADB_PMU + if ((param & 0xfff) == PMAC_SCC_IRDA) + pmu_enable_irled(0); +#endif /* CONFIG_ADB_PMU */ + } + return 0; +} + +static long ohare_floppy_enable(struct device_node *node, long param, + long value) +{ + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_FLOPPY_ENABLE, value); +} + +static long ohare_mesh_enable(struct device_node *node, long param, long value) +{ + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_MESH_ENABLE, value); +} + +static long ohare_ide_enable(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + /* For some reason, setting the bit in set_initial_features() + * doesn't stick. I'm still investigating... --BenH. + */ + if (value) + simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IOBUS_ENABLE, 1); + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_BAY_IDE_ENABLE, value); + default: + return -ENODEV; + } +} + +static long ohare_ide_reset(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE1_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long ohare_sleep_state(struct device_node *node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + if (value == 1) { + MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE); + } else if (value == 0) { + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } + + return 0; +} + +static long heathrow_modem_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + macio = macio_find(node, macio_unknown); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1; + if (!value) { + LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + mdelay(250); + } + if (pmac_mb.model_id != PMAC_TYPE_YOSEMITE && + pmac_mb.model_id != PMAC_TYPE_YIKES) { + LOCK(flags); + if (value) + MACIO_BIC(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + else + MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(250); + } + if (value) { + LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long heathrow_floppy_enable(struct device_node *node, long param, + long value) +{ + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, + HRW_SWIM_ENABLE|HRW_BAY_FLOPPY_ENABLE, + value); +} + +static long heathrow_mesh_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, macio_unknown); + if (!macio) + return -ENODEV; + LOCK(flags); + /* Set clear mesh cell enable */ + if (value) + MACIO_BIS(HEATHROW_FCR, HRW_MESH_ENABLE); + else + MACIO_BIC(HEATHROW_FCR, HRW_MESH_ENABLE); + (void)MACIO_IN32(HEATHROW_FCR); + udelay(10); + /* Set/Clear termination power */ + if (value) + MACIO_BIC(HEATHROW_MBCR, 0x04000000); + else + MACIO_BIS(HEATHROW_MBCR, 0x04000000); + (void)MACIO_IN32(HEATHROW_MBCR); + udelay(10); + UNLOCK(flags); + + return 0; +} + +static long heathrow_ide_enable(struct device_node *node, long param, + long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_BAY_IDE_ENABLE, value); + default: + return -ENODEV; + } +} + +static long heathrow_ide_reset(struct device_node *node, long param, + long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE1_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long heathrow_bmac_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (value) { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_BMAC_IO_ENABLE); + MACIO_BIS(HEATHROW_FCR, HRW_BMAC_RESET); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(10); + LOCK(flags); + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_RESET); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(10); + } else { + LOCK(flags); + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE); + UNLOCK(flags); + } + return 0; +} + +static long heathrow_sound_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + /* B&W G3 and Yikes don't support that properly (the + * sound appear to never come back after being shut down). + */ + if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE || + pmac_mb.model_id == PMAC_TYPE_YIKES) + return 0; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (value) { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + } else { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + UNLOCK(flags); + } + return 0; +} + +static u32 save_fcr[6]; +static u32 save_mbcr; +static struct dbdma_regs save_dbdma[13]; +static struct dbdma_regs save_alt_dbdma[13]; + +static void dbdma_save(struct macio_chip *macio, struct dbdma_regs *save) +{ + int i; + + /* Save state & config of DBDMA channels */ + for (i = 0; i < 13; i++) { + volatile struct dbdma_regs __iomem * chan = (void __iomem *) + (macio->base + ((0x8000+i*0x100)>>2)); + save[i].cmdptr_hi = in_le32(&chan->cmdptr_hi); + save[i].cmdptr = in_le32(&chan->cmdptr); + save[i].intr_sel = in_le32(&chan->intr_sel); + save[i].br_sel = in_le32(&chan->br_sel); + save[i].wait_sel = in_le32(&chan->wait_sel); + } +} + +static void dbdma_restore(struct macio_chip *macio, struct dbdma_regs *save) +{ + int i; + + /* Save state & config of DBDMA channels */ + for (i = 0; i < 13; i++) { + volatile struct dbdma_regs __iomem * chan = (void __iomem *) + (macio->base + ((0x8000+i*0x100)>>2)); + out_le32(&chan->control, (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16); + while (in_le32(&chan->status) & ACTIVE) + mb(); + out_le32(&chan->cmdptr_hi, save[i].cmdptr_hi); + out_le32(&chan->cmdptr, save[i].cmdptr); + out_le32(&chan->intr_sel, save[i].intr_sel); + out_le32(&chan->br_sel, save[i].br_sel); + out_le32(&chan->wait_sel, save[i].wait_sel); + } +} + +static void heathrow_sleep(struct macio_chip *macio, int secondary) +{ + if (secondary) { + dbdma_save(macio, save_alt_dbdma); + save_fcr[2] = MACIO_IN32(0x38); + save_fcr[3] = MACIO_IN32(0x3c); + } else { + dbdma_save(macio, save_dbdma); + save_fcr[0] = MACIO_IN32(0x38); + save_fcr[1] = MACIO_IN32(0x3c); + save_mbcr = MACIO_IN32(0x34); + /* Make sure sound is shut down */ + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + /* This seems to be necessary as well or the fan + * keeps coming up and battery drains fast */ + MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N); + /* Make sure eth is down even if module or sleep + * won't work properly */ + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET); + } + /* Make sure modem is shut down */ + MACIO_OUT8(HRW_GPIO_MODEM_RESET, + MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1); + MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + MACIO_BIC(HEATHROW_FCR, OH_SCCA_IO|OH_SCCB_IO|HRW_SCC_ENABLE); + + /* Let things settle */ + (void)MACIO_IN32(HEATHROW_FCR); +} + +static void heathrow_wakeup(struct macio_chip *macio, int secondary) +{ + if (secondary) { + MACIO_OUT32(0x38, save_fcr[2]); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x3c, save_fcr[3]); + (void)MACIO_IN32(0x38); + mdelay(10); + dbdma_restore(macio, save_alt_dbdma); + } else { + MACIO_OUT32(0x38, save_fcr[0] | HRW_IOBUS_ENABLE); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x3c, save_fcr[1]); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x34, save_mbcr); + (void)MACIO_IN32(0x38); + mdelay(10); + dbdma_restore(macio, save_dbdma); + } +} + +static long heathrow_sleep_state(struct device_node *node, long param, + long value) +{ + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + if (value == 1) { + if (macio_chips[1].type == macio_gatwick) + heathrow_sleep(&macio_chips[0], 1); + heathrow_sleep(&macio_chips[0], 0); + } else if (value == 0) { + heathrow_wakeup(&macio_chips[0], 0); + if (macio_chips[1].type == macio_gatwick) + heathrow_wakeup(&macio_chips[0], 1); + } + return 0; +} + +static long core99_scc_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + unsigned long chan_mask; + u32 fcr; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (of_node_name_eq(node, "ch-a")) + chan_mask = MACIO_FLAG_SCCA_ON; + else if (of_node_name_eq(node, "ch-b")) + chan_mask = MACIO_FLAG_SCCB_ON; + else + return -ENODEV; + + if (value) { + int need_reset_scc = 0; + int need_reset_irda = 0; + + LOCK(flags); + fcr = MACIO_IN32(KEYLARGO_FCR0); + /* Check if scc cell need enabling */ + if (!(fcr & KL0_SCC_CELL_ENABLE)) { + fcr |= KL0_SCC_CELL_ENABLE; + need_reset_scc = 1; + } + if (chan_mask & MACIO_FLAG_SCCA_ON) { + fcr |= KL0_SCCA_ENABLE; + /* Don't enable line drivers for I2S modem */ + if ((param & 0xfff) == PMAC_SCC_I2S1) + fcr &= ~KL0_SCC_A_INTF_ENABLE; + else + fcr |= KL0_SCC_A_INTF_ENABLE; + } + if (chan_mask & MACIO_FLAG_SCCB_ON) { + fcr |= KL0_SCCB_ENABLE; + /* Perform irda specific inits */ + if ((param & 0xfff) == PMAC_SCC_IRDA) { + fcr &= ~KL0_SCC_B_INTF_ENABLE; + fcr |= KL0_IRDA_ENABLE; + fcr |= KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE; + fcr |= KL0_IRDA_SOURCE1_SEL; + fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0); + fcr &= ~(KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND); + need_reset_irda = 1; + } else + fcr |= KL0_SCC_B_INTF_ENABLE; + } + MACIO_OUT32(KEYLARGO_FCR0, fcr); + macio->flags |= chan_mask; + if (need_reset_scc) { + MACIO_BIS(KEYLARGO_FCR0, KL0_SCC_RESET); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(15); + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR0, KL0_SCC_RESET); + } + if (need_reset_irda) { + MACIO_BIS(KEYLARGO_FCR0, KL0_IRDA_RESET); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(15); + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR0, KL0_IRDA_RESET); + } + UNLOCK(flags); + if (param & PMAC_SCC_FLAG_XMON) + macio->flags |= MACIO_FLAG_SCC_LOCKED; + } else { + if (macio->flags & MACIO_FLAG_SCC_LOCKED) + return -EPERM; + LOCK(flags); + fcr = MACIO_IN32(KEYLARGO_FCR0); + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr &= ~KL0_SCCA_ENABLE; + if (chan_mask & MACIO_FLAG_SCCB_ON) { + fcr &= ~KL0_SCCB_ENABLE; + /* Perform irda specific clears */ + if ((param & 0xfff) == PMAC_SCC_IRDA) { + fcr &= ~KL0_IRDA_ENABLE; + fcr &= ~(KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE); + fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0); + fcr &= ~(KL0_IRDA_SOURCE1_SEL|KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND); + } + } + MACIO_OUT32(KEYLARGO_FCR0, fcr); + if ((fcr & (KL0_SCCA_ENABLE | KL0_SCCB_ENABLE)) == 0) { + fcr &= ~KL0_SCC_CELL_ENABLE; + MACIO_OUT32(KEYLARGO_FCR0, fcr); + } + macio->flags &= ~(chan_mask); + UNLOCK(flags); + mdelay(10); + } + return 0; +} + +static long +core99_modem_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + /* Hack for internal USB modem */ + if (node == NULL) { + if (macio_chips[0].type != macio_keylargo) + return -ENODEV; + node = macio_chips[0].of_node; + } + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(KL_GPIO_MODEM_RESET); + gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE; + gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA; + + if (!value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + mdelay(250); + } + LOCK(flags); + if (value) { + MACIO_BIC(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR2); + mdelay(250); + } else { + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + UNLOCK(flags); + } + if (value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long +pangea_modem_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + /* Hack for internal USB modem */ + if (node == NULL) { + if (macio_chips[0].type != macio_pangea && + macio_chips[0].type != macio_intrepid) + return -ENODEV; + node = macio_chips[0].of_node; + } + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(KL_GPIO_MODEM_RESET); + gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE; + gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA; + + if (!value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + mdelay(250); + } + LOCK(flags); + if (value) { + MACIO_OUT8(KL_GPIO_MODEM_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR2); + mdelay(250); + } else { + MACIO_OUT8(KL_GPIO_MODEM_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA); + UNLOCK(flags); + } + if (value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long +core99_ata100_enable(struct device_node *node, long value) +{ + unsigned long flags; + struct pci_dev *pdev = NULL; + u8 pbus, pid; + int rc; + + if (uninorth_rev < 0x24) + return -ENODEV; + + LOCK(flags); + if (value) + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100); + else + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100); + (void)UN_IN(UNI_N_CLOCK_CNTL); + UNLOCK(flags); + udelay(20); + + if (value) { + if (pci_device_from_OF_node(node, &pbus, &pid) == 0) + pdev = pci_get_domain_bus_and_slot(0, pbus, pid); + if (pdev == NULL) + return 0; + rc = pci_enable_device(pdev); + if (rc == 0) + pci_set_master(pdev); + pci_dev_put(pdev); + if (rc) + return rc; + } + return 0; +} + +static long +core99_ide_enable(struct device_node *node, long param, long value) +{ + /* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2 + * based ata-100 + */ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE1_ENABLE, value); + case 2: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_UIDE_ENABLE, value); + case 3: + return core99_ata100_enable(node, value); + default: + return -ENODEV; + } +} + +static long +core99_ide_reset(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE1_RESET_N, !value); + case 2: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_UIDE_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long +core99_gmac_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + LOCK(flags); + if (value) + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC); + else + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC); + (void)UN_IN(UNI_N_CLOCK_CNTL); + UNLOCK(flags); + udelay(20); + + return 0; +} + +static long +core99_gmac_phy_reset(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(KL_GPIO_ETH_PHY_RESET); + UNLOCK(flags); + mdelay(10); + LOCK(flags); + MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, /*KEYLARGO_GPIO_OUTPUT_ENABLE | */ + KEYLARGO_GPIO_OUTOUT_DATA); + UNLOCK(flags); + mdelay(10); + + return 0; +} + +static long +core99_sound_chip_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + + /* Do a better probe code, screamer G4 desktops & + * iMacs can do that too, add a recalibrate in + * the driver as well + */ + if (pmac_mb.model_id == PMAC_TYPE_PISMO || + pmac_mb.model_id == PMAC_TYPE_TITANIUM) { + LOCK(flags); + if (value) + MACIO_OUT8(KL_GPIO_SOUND_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE | + KEYLARGO_GPIO_OUTOUT_DATA); + else + MACIO_OUT8(KL_GPIO_SOUND_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(KL_GPIO_SOUND_POWER); + UNLOCK(flags); + } + return 0; +} + +static long +core99_airport_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + int state; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + + /* Hint: we allow passing of macio itself for the sake of the + * sleep code + */ + if (node != macio->of_node && + (!node->parent || node->parent != macio->of_node)) + return -ENODEV; + state = (macio->flags & MACIO_FLAG_AIRPORT_ON) != 0; + if (value == state) + return 0; + if (value) { + /* This code is a reproduction of OF enable-cardslot + * and init-wireless methods, slightly hacked until + * I got it working. + */ + LOCK(flags); + MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 5); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf); + UNLOCK(flags); + mdelay(10); + LOCK(flags); + MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 4); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf); + UNLOCK(flags); + + mdelay(10); + + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xb, 0); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xb); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xa, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xa); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xd, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xd); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_0+0xd, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xd); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_0+0xe, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xe); + UNLOCK(flags); + udelay(10); + MACIO_OUT32(0x1c000, 0); + mdelay(1); + MACIO_OUT8(0x1a3e0, 0x41); + (void)MACIO_IN8(0x1a3e0); + udelay(10); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + UNLOCK(flags); + mdelay(100); + + macio->flags |= MACIO_FLAG_AIRPORT_ON; + } else { + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + MACIO_OUT8(KL_GPIO_AIRPORT_0, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_1, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_2, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_3, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_4, 0); + (void)MACIO_IN8(KL_GPIO_AIRPORT_4); + UNLOCK(flags); + + macio->flags &= ~MACIO_FLAG_AIRPORT_ON; + } + return 0; +} + +#ifdef CONFIG_SMP +static long +core99_reset_cpu(struct device_node *node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip *macio; + struct device_node *np; + const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0, + KL_GPIO_RESET_CPU1, + KL_GPIO_RESET_CPU2, + KL_GPIO_RESET_CPU3 }; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo) + return -ENODEV; + + for_each_of_cpu_node(np) { + const u32 *rst = of_get_property(np, "soft-reset", NULL); + if (!rst) + continue; + if (param == of_get_cpu_hwid(np, 0)) { + of_node_put(np); + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + reset_io = dflt_reset_lines[param]; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +static long +core99_usb_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio; + unsigned long flags; + const char *prop; + int number; + u32 reg; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + prop = of_get_property(node, "AAPL,clock-id", NULL); + if (!prop) + return -ENODEV; + if (strncmp(prop, "usb0u048", 8) == 0) + number = 0; + else if (strncmp(prop, "usb1u148", 8) == 0) + number = 2; + else if (strncmp(prop, "usb2u248", 8) == 0) + number = 4; + else + return -ENODEV; + + /* Sorry for the brute-force locking, but this is only used during + * sleep and the timing seem to be critical + */ + LOCK(flags); + if (value) { + /* Turn ON */ + if (number == 0) { + MACIO_BIC(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE); + } else if (number == 2) { + MACIO_BIC(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1)); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE); + } else if (number == 4) { + MACIO_BIC(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1)); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR1); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR1, KL1_USB2_CELL_ENABLE); + } + if (number < 4) { + reg = MACIO_IN32(KEYLARGO_FCR4); + reg &= ~(KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) | + KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number)); + reg &= ~(KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) | + KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1)); + MACIO_OUT32(KEYLARGO_FCR4, reg); + (void)MACIO_IN32(KEYLARGO_FCR4); + udelay(10); + } else { + reg = MACIO_IN32(KEYLARGO_FCR3); + reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) | + KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0)); + reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) | + KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1)); + MACIO_OUT32(KEYLARGO_FCR3, reg); + (void)MACIO_IN32(KEYLARGO_FCR3); + udelay(10); + } + if (macio->type == macio_intrepid) { + /* wait for clock stopped bits to clear */ + u32 test0 = 0, test1 = 0; + u32 status0, status1; + int timeout = 1000; + + UNLOCK(flags); + switch (number) { + case 0: + test0 = UNI_N_CLOCK_STOPPED_USB0; + test1 = UNI_N_CLOCK_STOPPED_USB0PCI; + break; + case 2: + test0 = UNI_N_CLOCK_STOPPED_USB1; + test1 = UNI_N_CLOCK_STOPPED_USB1PCI; + break; + case 4: + test0 = UNI_N_CLOCK_STOPPED_USB2; + test1 = UNI_N_CLOCK_STOPPED_USB2PCI; + break; + } + do { + if (--timeout <= 0) { + printk(KERN_ERR "core99_usb_enable: " + "Timeout waiting for clocks\n"); + break; + } + mdelay(1); + status0 = UN_IN(UNI_N_CLOCK_STOP_STATUS0); + status1 = UN_IN(UNI_N_CLOCK_STOP_STATUS1); + } while ((status0 & test0) | (status1 & test1)); + LOCK(flags); + } + } else { + /* Turn OFF */ + if (number < 4) { + reg = MACIO_IN32(KEYLARGO_FCR4); + reg |= KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) | + KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number); + reg |= KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) | + KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1); + MACIO_OUT32(KEYLARGO_FCR4, reg); + (void)MACIO_IN32(KEYLARGO_FCR4); + udelay(1); + } else { + reg = MACIO_IN32(KEYLARGO_FCR3); + reg |= KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) | + KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0); + reg |= KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) | + KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1); + MACIO_OUT32(KEYLARGO_FCR3, reg); + (void)MACIO_IN32(KEYLARGO_FCR3); + udelay(1); + } + if (number == 0) { + if (macio->type != macio_intrepid) + MACIO_BIC(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE); + (void)MACIO_IN32(KEYLARGO_FCR0); + udelay(1); + MACIO_BIS(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + } else if (number == 2) { + if (macio->type != macio_intrepid) + MACIO_BIC(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE); + (void)MACIO_IN32(KEYLARGO_FCR0); + udelay(1); + MACIO_BIS(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + } else if (number == 4) { + udelay(1); + MACIO_BIS(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR1); + } + udelay(1); + } + UNLOCK(flags); + + return 0; +} + +static long +core99_firewire_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED)) + return -ENODEV; + + LOCK(flags); + if (value) { + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW); + (void)UN_IN(UNI_N_CLOCK_CNTL); + } else { + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW); + (void)UN_IN(UNI_N_CLOCK_CNTL); + } + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long +core99_firewire_cable_power(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + /* Trick: we allow NULL node */ + if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0) + return -ENODEV; + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED)) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 0); + MACIO_IN8(KL_GPIO_FW_CABLE_POWER); + udelay(10); + } else { + MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 4); + MACIO_IN8(KL_GPIO_FW_CABLE_POWER); udelay(10); + } + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long +intrepid_aack_delay_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + if (uninorth_rev < 0xd2) + return -ENODEV; + + LOCK(flags); + if (param) + UN_BIS(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE); + else + UN_BIC(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE); + UNLOCK(flags); + + return 0; +} + + +#endif /* CONFIG_PPC64 */ + +static long +core99_read_gpio(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + + return MACIO_IN8(param); +} + + +static long +core99_write_gpio(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + + MACIO_OUT8(param, (u8)(value & 0xff)); + return 0; +} + +#ifdef CONFIG_PPC64 +static long g5_gmac_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + mb(); + k2_skiplist[0] = NULL; + } else { + k2_skiplist[0] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long g5_fw_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + mb(); + k2_skiplist[1] = NULL; + } else { + k2_skiplist[1] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long g5_mpic_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct device_node *parent = of_get_parent(node); + int is_u3; + + if (parent == NULL) + return 0; + is_u3 = of_node_name_eq(parent, "u3") || of_node_name_eq(parent, "u4"); + of_node_put(parent); + if (!is_u3) + return 0; + + LOCK(flags); + UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE); + UNLOCK(flags); + + return 0; +} + +static long g5_eth_phy_reset(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + struct device_node *phy; + int need_reset; + + /* + * We must not reset the combo PHYs, only the BCM5221 found in + * the iMac G5. + */ + phy = of_get_next_child(node, NULL); + if (!phy) + return -ENODEV; + need_reset = of_device_is_compatible(phy, "B5221"); + of_node_put(phy); + if (!need_reset) + return 0; + + /* PHY reset is GPIO 29, not in device-tree unfortunately */ + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, + KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA); + /* Thankfully, this is now always called at a time when we can + * schedule by sungem. + */ + msleep(10); + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0); + + return 0; +} + +static long g5_i2s_enable(struct device_node *node, long param, long value) +{ + /* Very crude implementation for now */ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + int cell; + u32 fcrs[3][3] = { + { 0, + K2_FCR1_I2S0_CELL_ENABLE | + K2_FCR1_I2S0_CLK_ENABLE_BIT | K2_FCR1_I2S0_ENABLE, + KL3_I2S0_CLK18_ENABLE + }, + { KL0_SCC_A_INTF_ENABLE, + K2_FCR1_I2S1_CELL_ENABLE | + K2_FCR1_I2S1_CLK_ENABLE_BIT | K2_FCR1_I2S1_ENABLE, + KL3_I2S1_CLK18_ENABLE + }, + { KL0_SCC_B_INTF_ENABLE, + SH_FCR1_I2S2_CELL_ENABLE | + SH_FCR1_I2S2_CLK_ENABLE_BIT | SH_FCR1_I2S2_ENABLE, + SH_FCR3_I2S2_CLK18_ENABLE + }, + }; + + if (macio->type != macio_keylargo2 && macio->type != macio_shasta) + return -ENODEV; + if (strncmp(node->name, "i2s-", 4)) + return -ENODEV; + cell = node->name[4] - 'a'; + switch(cell) { + case 0: + case 1: + break; + case 2: + if (macio->type == macio_shasta) + break; + fallthrough; + default: + return -ENODEV; + } + + LOCK(flags); + if (value) { + MACIO_BIC(KEYLARGO_FCR0, fcrs[cell][0]); + MACIO_BIS(KEYLARGO_FCR1, fcrs[cell][1]); + MACIO_BIS(KEYLARGO_FCR3, fcrs[cell][2]); + } else { + MACIO_BIC(KEYLARGO_FCR3, fcrs[cell][2]); + MACIO_BIC(KEYLARGO_FCR1, fcrs[cell][1]); + MACIO_BIS(KEYLARGO_FCR0, fcrs[cell][0]); + } + udelay(10); + UNLOCK(flags); + + return 0; +} + + +#ifdef CONFIG_SMP +static long g5_reset_cpu(struct device_node *node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip *macio; + struct device_node *np; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo2 && macio->type != macio_shasta) + return -ENODEV; + + for_each_of_cpu_node(np) { + const u32 *rst = of_get_property(np, "soft-reset", NULL); + if (!rst) + continue; + if (param == of_get_cpu_hwid(np, 0)) { + of_node_put(np); + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * This can be called from pmac_smp so isn't static + * + * This takes the second CPU off the bus on dual CPU machines + * running UP + */ +void __init g5_phy_disable_cpu1(void) +{ + if (uninorth_maj == 3) + UN_OUT(U3_API_PHY_CONFIG_1, 0); +} +#endif /* CONFIG_PPC64 */ + +#ifndef CONFIG_PPC64 + + +#ifdef CONFIG_PM +static u32 save_gpio_levels[2]; +static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT]; +static u8 save_gpio_normal[KEYLARGO_GPIO_CNT]; +static u32 save_unin_clock_ctl; + +static void keylargo_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + if (sleep_mode) { + mdelay(1); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND); + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(1); + } + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE | + KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE | + KL0_IRDA_CLK19_ENABLE); + + MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK); + MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT | + KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE | + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE | + KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N | + KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N | + KL1_UIDE_ENABLE); + + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE); + + temp = MACIO_IN32(KEYLARGO_FCR3); + if (macio->rev >= 2) { + temp |= KL3_SHUTDOWN_PLL2X; + if (sleep_mode) + temp |= KL3_SHUTDOWN_PLL_TOTAL; + } + + temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 | + KL3_SHUTDOWN_PLLKW35; + if (sleep_mode) + temp |= KL3_SHUTDOWN_PLLKW12; + temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE + | KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1); +} + +static void pangea_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE | + KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT | + KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE | + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE | + KL1_UIDE_ENABLE); + if (pmac_mb.board_flags & PMAC_MB_MOBILE) + MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N); + + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + + temp = MACIO_IN32(KEYLARGO_FCR3); + temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 | + KL3_SHUTDOWN_PLLKW35; + temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE + | KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1); +} + +static void intrepid_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE | + KL1_EIDE0_ENABLE); + if (pmac_mb.board_flags & PMAC_MB_MOBILE) + MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N); + + temp = MACIO_IN32(KEYLARGO_FCR3); + temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | + KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(10); +} + + +static int +core99_sleep(void) +{ + struct macio_chip *macio; + int i; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + /* We power off the wireless slot in case it was not done + * by the driver. We don't power it on automatically however + */ + if (macio->flags & MACIO_FLAG_AIRPORT_ON) + core99_airport_enable(macio->of_node, 0, 0); + + /* We power off the FW cable. Should be done by the driver... */ + if (macio->flags & MACIO_FLAG_FW_SUPPORTED) { + core99_firewire_enable(NULL, 0, 0); + core99_firewire_cable_power(NULL, 0, 0); + } + + /* We make sure int. modem is off (in case driver lost it) */ + if (macio->type == macio_keylargo) + core99_modem_enable(macio->of_node, 0, 0); + else + pangea_modem_enable(macio->of_node, 0, 0); + + /* We make sure the sound is off as well */ + core99_sound_chip_enable(macio->of_node, 0, 0); + + /* + * Save various bits of KeyLargo + */ + + /* Save the state of the various GPIOs */ + save_gpio_levels[0] = MACIO_IN32(KEYLARGO_GPIO_LEVELS0); + save_gpio_levels[1] = MACIO_IN32(KEYLARGO_GPIO_LEVELS1); + for (i=0; itype == macio_keylargo) + save_mbcr = MACIO_IN32(KEYLARGO_MBCR); + save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0); + save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1); + save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2); + save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3); + save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4); + if (macio->type == macio_pangea || macio->type == macio_intrepid) + save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5); + + /* Save state & config of DBDMA channels */ + dbdma_save(macio, save_dbdma); + + /* + * Turn off as much as we can + */ + if (macio->type == macio_pangea) + pangea_shutdown(macio, 1); + else if (macio->type == macio_intrepid) + intrepid_shutdown(macio, 1); + else if (macio->type == macio_keylargo) + keylargo_shutdown(macio, 1); + + /* + * Put the host bridge to sleep + */ + + save_unin_clock_ctl = UN_IN(UNI_N_CLOCK_CNTL); + /* Note: do not switch GMAC off, driver does it when necessary, WOL must keep it + * enabled ! + */ + UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl & + ~(/*UNI_N_CLOCK_CNTL_GMAC|*/UNI_N_CLOCK_CNTL_FW/*|UNI_N_CLOCK_CNTL_PCI*/)); + udelay(100); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING); + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_SLEEP); + mdelay(10); + + /* + * FIXME: A bit of black magic with OpenPIC (don't ask me why) + */ + if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) { + MACIO_BIS(0x506e0, 0x00400000); + MACIO_BIS(0x506e0, 0x80000000); + } + return 0; +} + +static int +core99_wake_up(void) +{ + struct macio_chip *macio; + int i; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + /* + * Wakeup the host bridge + */ + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL); + udelay(10); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING); + udelay(10); + + /* + * Restore KeyLargo + */ + + if (macio->type == macio_keylargo) { + MACIO_OUT32(KEYLARGO_MBCR, save_mbcr); + (void)MACIO_IN32(KEYLARGO_MBCR); udelay(10); + } + MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]); + (void)MACIO_IN32(KEYLARGO_FCR0); udelay(10); + MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]); + (void)MACIO_IN32(KEYLARGO_FCR1); udelay(10); + MACIO_OUT32(KEYLARGO_FCR2, save_fcr[2]); + (void)MACIO_IN32(KEYLARGO_FCR2); udelay(10); + MACIO_OUT32(KEYLARGO_FCR3, save_fcr[3]); + (void)MACIO_IN32(KEYLARGO_FCR3); udelay(10); + MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]); + (void)MACIO_IN32(KEYLARGO_FCR4); udelay(10); + if (macio->type == macio_pangea || macio->type == macio_intrepid) { + MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]); + (void)MACIO_IN32(KEYLARGO_FCR5); udelay(10); + } + + dbdma_restore(macio, save_dbdma); + + MACIO_OUT32(KEYLARGO_GPIO_LEVELS0, save_gpio_levels[0]); + MACIO_OUT32(KEYLARGO_GPIO_LEVELS1, save_gpio_levels[1]); + for (i=0; itype) { +#ifndef CONFIG_PPC64 + case macio_grand_central: + pmac_mb.model_id = PMAC_TYPE_PSURGE; + pmac_mb.model_name = "Unknown PowerSurge"; + break; + case macio_ohare: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_OHARE; + pmac_mb.model_name = "Unknown OHare-based"; + break; + case macio_heathrow: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_HEATHROW; + pmac_mb.model_name = "Unknown Heathrow-based"; + pmac_mb.features = heathrow_desktop_features; + break; + case macio_paddington: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PADDINGTON; + pmac_mb.model_name = "Unknown Paddington-based"; + pmac_mb.features = paddington_features; + break; + case macio_keylargo: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_CORE99; + pmac_mb.model_name = "Unknown Keylargo-based"; + pmac_mb.features = core99_features; + break; + case macio_pangea: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA; + pmac_mb.model_name = "Unknown Pangea-based"; + pmac_mb.features = pangea_features; + break; + case macio_intrepid: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_INTREPID; + pmac_mb.model_name = "Unknown Intrepid-based"; + pmac_mb.features = intrepid_features; + break; +#else /* CONFIG_PPC64 */ + case macio_keylargo2: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; + pmac_mb.model_name = "Unknown K2-based"; + pmac_mb.features = g5_features; + break; + case macio_shasta: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_SHASTA; + pmac_mb.model_name = "Unknown Shasta-based"; + pmac_mb.features = g5_features; + break; +#endif /* CONFIG_PPC64 */ + default: + ret = -ENODEV; + goto done; + } +found: +#ifndef CONFIG_PPC64 + /* Fixup Hooper vs. Comet */ + if (pmac_mb.model_id == PMAC_TYPE_HOOPER) { + u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4); + if (!mach_id_ptr) { + ret = -ENODEV; + goto done; + } + /* Here, I used to disable the media-bay on comet. It + * appears this is wrong, the floppy connector is actually + * a kind of media-bay and works with the current driver. + */ + if (__raw_readl(mach_id_ptr) & 0x20000000UL) + pmac_mb.model_id = PMAC_TYPE_COMET; + iounmap(mach_id_ptr); + } + + /* Set default value of powersave_nap on machines that support it. + * It appears that uninorth rev 3 has a problem with it, we don't + * enable it on those. In theory, the flush-on-lock property is + * supposed to be set when not supported, but I'm not very confident + * that all Apple OF revs did it properly, I do it the paranoid way. + */ + if (uninorth_base && uninorth_rev > 3) { + struct device_node *np; + + for_each_of_cpu_node(np) { + int cpu_count = 1; + + /* Nap mode not supported on SMP */ + if (of_property_read_bool(np, "flush-on-lock") || + (cpu_count > 1)) { + powersave_nap = 0; + of_node_put(np); + break; + } + + cpu_count++; + powersave_nap = 1; + } + } + if (powersave_nap) + printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n"); + + /* On CPUs that support it (750FX), lowspeed by default during + * NAP mode + */ + powersave_lowspeed = 1; + +#else /* CONFIG_PPC64 */ + powersave_nap = 1; +#endif /* CONFIG_PPC64 */ + + /* Check for "mobile" machine */ + if (model && (strncmp(model, "PowerBook", 9) == 0 + || strncmp(model, "iBook", 5) == 0)) + pmac_mb.board_flags |= PMAC_MB_MOBILE; + + + printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name); +done: + of_node_put(dt); + return ret; +} + +/* Initialize the Core99 UniNorth host bridge and memory controller + */ +static void __init probe_uninorth(void) +{ + struct resource res; + unsigned long actrl; + + /* Locate core99 Uni-N */ + uninorth_node = of_find_node_by_name(NULL, "uni-n"); + uninorth_maj = 1; + + /* Locate G5 u3 */ + if (uninorth_node == NULL) { + uninorth_node = of_find_node_by_name(NULL, "u3"); + uninorth_maj = 3; + } + /* Locate G5 u4 */ + if (uninorth_node == NULL) { + uninorth_node = of_find_node_by_name(NULL, "u4"); + uninorth_maj = 4; + } + if (uninorth_node == NULL) { + uninorth_maj = 0; + return; + } + + if (of_address_to_resource(uninorth_node, 0, &res)) + return; + + uninorth_base = ioremap(res.start, 0x40000); + if (uninorth_base == NULL) + return; + uninorth_rev = in_be32(UN_REG(UNI_N_VERSION)); + if (uninorth_maj == 3 || uninorth_maj == 4) { + u3_ht_base = ioremap(res.start + U3_HT_CONFIG_BASE, 0x1000); + if (u3_ht_base == NULL) { + iounmap(uninorth_base); + return; + } + } + + printk(KERN_INFO "Found %s memory controller & host bridge" + " @ 0x%08x revision: 0x%02x\n", uninorth_maj == 3 ? "U3" : + uninorth_maj == 4 ? "U4" : "UniNorth", + (unsigned int)res.start, uninorth_rev); + printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base); + + /* Set the arbitrer QAck delay according to what Apple does + */ + if (uninorth_rev < 0x11) { + actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK; + actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 : + UNI_N_ARB_CTRL_QACK_DELAY) << + UNI_N_ARB_CTRL_QACK_DELAY_SHIFT; + UN_OUT(UNI_N_ARB_CTRL, actrl); + } + + /* Some more magic as done by them in recent MacOS X on UniNorth + * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI + * memory timeout + */ + if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) || + uninorth_rev == 0xc0) + UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff); +} + +static void __init probe_one_macio(const char *name, const char *compat, int type) +{ + struct device_node* node; + int i; + volatile u32 __iomem *base; + const u32 *addrp, *revp; + phys_addr_t addr; + u64 size; + + for_each_node_by_name(node, name) { + if (!compat) + break; + if (of_device_is_compatible(node, compat)) + break; + } + if (!node) + return; + for(i=0; i= MAX_MACIO_CHIPS) { + printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n"); + printk(KERN_ERR "pmac_feature: %pOF skipped\n", node); + goto out_put; + } + addrp = of_get_pci_address(node, 0, &size, NULL); + if (addrp == NULL) { + printk(KERN_ERR "pmac_feature: %pOF: can't find base !\n", + node); + goto out_put; + } + addr = of_translate_address(node, addrp); + if (addr == 0) { + printk(KERN_ERR "pmac_feature: %pOF, can't translate base !\n", + node); + goto out_put; + } + base = ioremap(addr, (unsigned long)size); + if (!base) { + printk(KERN_ERR "pmac_feature: %pOF, can't map mac-io chip !\n", + node); + goto out_put; + } + if (type == macio_keylargo || type == macio_keylargo2) { + const u32 *did = of_get_property(node, "device-id", NULL); + if (*did == 0x00000025) + type = macio_pangea; + if (*did == 0x0000003e) + type = macio_intrepid; + if (*did == 0x0000004f) + type = macio_shasta; + } + macio_chips[i].of_node = node; + macio_chips[i].type = type; + macio_chips[i].base = base; + macio_chips[i].flags = MACIO_FLAG_SCCA_ON | MACIO_FLAG_SCCB_ON; + macio_chips[i].name = macio_names[type]; + revp = of_get_property(node, "revision-id", NULL); + if (revp) + macio_chips[i].rev = *revp; + printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n", + macio_names[type], macio_chips[i].rev, macio_chips[i].base); + + return; + +out_put: + of_node_put(node); +} + +static int __init +probe_macios(void) +{ + /* Warning, ordering is important */ + probe_one_macio("gc", NULL, macio_grand_central); + probe_one_macio("ohare", NULL, macio_ohare); + probe_one_macio("pci106b,7", NULL, macio_ohareII); + probe_one_macio("mac-io", "keylargo", macio_keylargo); + probe_one_macio("mac-io", "paddington", macio_paddington); + probe_one_macio("mac-io", "gatwick", macio_gatwick); + probe_one_macio("mac-io", "heathrow", macio_heathrow); + probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2); + + /* Make sure the "main" macio chip appear first */ + if (macio_chips[0].type == macio_gatwick + && macio_chips[1].type == macio_heathrow) { + struct macio_chip temp = macio_chips[0]; + macio_chips[0] = macio_chips[1]; + macio_chips[1] = temp; + } + if (macio_chips[0].type == macio_ohareII + && macio_chips[1].type == macio_ohare) { + struct macio_chip temp = macio_chips[0]; + macio_chips[0] = macio_chips[1]; + macio_chips[1] = temp; + } + macio_chips[0].lbus.index = 0; + macio_chips[1].lbus.index = 1; + + return (macio_chips[0].of_node == NULL) ? -ENODEV : 0; +} + +static void __init +initial_serial_shutdown(struct device_node *np) +{ + int len; + const struct slot_names_prop { + int count; + char name[1]; + } *slots; + const char *conn; + int port_type = PMAC_SCC_ASYNC; + int modem = 0; + + slots = of_get_property(np, "slot-names", &len); + conn = of_get_property(np, "AAPL,connector", &len); + if (conn && (strcmp(conn, "infrared") == 0)) + port_type = PMAC_SCC_IRDA; + else if (of_device_is_compatible(np, "cobalt")) + modem = 1; + else if (slots && slots->count > 0) { + if (strcmp(slots->name, "IrDA") == 0) + port_type = PMAC_SCC_IRDA; + else if (strcmp(slots->name, "Modem") == 0) + modem = 1; + } + if (modem) + pmac_call_feature(PMAC_FTR_MODEM_ENABLE, np, 0, 0); + pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, port_type, 0); +} + +static void __init +set_initial_features(void) +{ + struct device_node *np; + + /* That hack appears to be necessary for some StarMax motherboards + * but I'm not too sure it was audited for side-effects on other + * ohare based machines... + * Since I still have difficulties figuring the right way to + * differentiate them all and since that hack was there for a long + * time, I'll keep it around + */ + if (macio_chips[0].type == macio_ohare) { + struct macio_chip *macio = &macio_chips[0]; + np = of_find_node_by_name(NULL, "via-pmu"); + if (np) + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + else + MACIO_OUT32(OHARE_FCR, STARMAX_FEATURES); + of_node_put(np); + } else if (macio_chips[1].type == macio_ohare) { + struct macio_chip *macio = &macio_chips[1]; + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } + +#ifdef CONFIG_PPC64 + if (macio_chips[0].type == macio_keylargo2 || + macio_chips[0].type == macio_shasta) { +#ifndef CONFIG_SMP + /* On SMP machines running UP, we have the second CPU eating + * bus cycles. We need to take it off the bus. This is done + * from pmac_smp for SMP kernels running on one CPU + */ + np = of_find_node_by_type(NULL, "cpu"); + if (np != NULL) + np = of_find_node_by_type(np, "cpu"); + if (np != NULL) { + g5_phy_disable_cpu1(); + of_node_put(np); + } +#endif /* CONFIG_SMP */ + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + for_each_node_by_name(np, "ethernet") + if (of_device_is_compatible(np, "K2-GMAC")) + g5_gmac_enable(np, 0, 1); + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + for_each_node_by_name(np, "firewire") { + if (of_device_is_compatible(np, "pci106b,5811")) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + g5_fw_enable(np, 0, 1); + } + } + } +#else /* CONFIG_PPC64 */ + + if (macio_chips[0].type == macio_keylargo || + macio_chips[0].type == macio_pangea || + macio_chips[0].type == macio_intrepid) { + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + for_each_node_by_name(np, "ethernet") { + if (np->parent + && of_device_is_compatible(np->parent, "uni-north") + && of_device_is_compatible(np, "gmac")) + core99_gmac_enable(np, 0, 1); + } + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + for_each_node_by_name(np, "firewire") { + if (np->parent + && of_device_is_compatible(np->parent, "uni-north") + && (of_device_is_compatible(np, "pci106b,18") || + of_device_is_compatible(np, "pci106b,30") || + of_device_is_compatible(np, "pci11c1,5811"))) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + core99_firewire_enable(np, 0, 1); + } + } + + /* Enable ATA-100 before PCI probe. */ + for_each_node_by_name(np, "ata-6") { + if (np->parent + && of_device_is_compatible(np->parent, "uni-north") + && of_device_is_compatible(np, "kauai-ata")) { + core99_ata100_enable(np, 1); + } + } + + /* Switch airport off */ + for_each_node_by_name(np, "radio") { + if (np->parent == macio_chips[0].of_node) { + macio_chips[0].flags |= MACIO_FLAG_AIRPORT_ON; + core99_airport_enable(np, 0, 0); + } + } + } + + /* On all machines that support sound PM, switch sound off */ + if (macio_chips[0].of_node) + pmac_do_feature_call(PMAC_FTR_SOUND_CHIP_ENABLE, + macio_chips[0].of_node, 0, 0); + + /* While on some desktop G3s, we turn it back on */ + if (macio_chips[0].of_node && macio_chips[0].type == macio_heathrow + && (pmac_mb.model_id == PMAC_TYPE_GOSSAMER || + pmac_mb.model_id == PMAC_TYPE_SILK)) { + struct macio_chip *macio = &macio_chips[0]; + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); + } + +#endif /* CONFIG_PPC64 */ + + /* On all machines, switch modem & serial ports off */ + for_each_node_by_name(np, "ch-a") + initial_serial_shutdown(np); + for_each_node_by_name(np, "ch-b") + initial_serial_shutdown(np); +} + +void __init +pmac_feature_init(void) +{ + /* Detect the UniNorth memory controller */ + probe_uninorth(); + + /* Probe mac-io controllers */ + if (probe_macios()) { + printk(KERN_WARNING "No mac-io chip found\n"); + return; + } + + /* Probe machine type */ + if (probe_motherboard()) + printk(KERN_WARNING "Unknown PowerMac !\n"); + + /* Set some initial features (turn off some chips that will + * be later turned on) + */ + set_initial_features(); +} + +#if 0 +static void dump_HT_speeds(char *name, u32 cfg, u32 frq) +{ + int freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 }; + int bits[8] = { 8,16,0,32,2,4,0,0 }; + int freq = (frq >> 8) & 0xf; + + if (freqs[freq] == 0) + printk("%s: Unknown HT link frequency %x\n", name, freq); + else + printk("%s: %d MHz on main link, (%d in / %d out) bits width\n", + name, freqs[freq], + bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]); +} + +void __init pmac_check_ht_link(void) +{ + u32 ufreq, freq, ucfg, cfg; + struct device_node *pcix_node; + u8 px_bus, px_devfn; + struct pci_controller *px_hose; + + (void)in_be32(u3_ht_base + U3_HT_LINK_COMMAND); + ucfg = cfg = in_be32(u3_ht_base + U3_HT_LINK_CONFIG); + ufreq = freq = in_be32(u3_ht_base + U3_HT_LINK_FREQ); + dump_HT_speeds("U3 HyperTransport", cfg, freq); + + pcix_node = of_find_compatible_node(NULL, "pci", "pci-x"); + if (pcix_node == NULL) { + printk("No PCI-X bridge found\n"); + return; + } + if (pci_device_from_OF_node(pcix_node, &px_bus, &px_devfn) != 0) { + printk("PCI-X bridge found but not matched to pci\n"); + return; + } + px_hose = pci_find_hose_for_OF_device(pcix_node); + if (px_hose == NULL) { + printk("PCI-X bridge found but not matched to host\n"); + return; + } + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq); + dump_HT_speeds("PCI-X HT Uplink", cfg, freq); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq); + dump_HT_speeds("PCI-X HT Downlink", cfg, freq); +} +#endif /* 0 */ + +/* + * Early video resume hook + */ + +static void (*pmac_early_vresume_proc)(void *data); +static void *pmac_early_vresume_data; + +void pmac_set_early_video_resume(void (*proc)(void *data), void *data) +{ + if (!machine_is(powermac)) + return; + preempt_disable(); + pmac_early_vresume_proc = proc; + pmac_early_vresume_data = data; + preempt_enable(); +} +EXPORT_SYMBOL(pmac_set_early_video_resume); + +void pmac_call_early_video_resume(void) +{ + if (pmac_early_vresume_proc) + pmac_early_vresume_proc(pmac_early_vresume_data); +} + +/* + * AGP related suspend/resume code + */ + +static struct pci_dev *pmac_agp_bridge; +static int (*pmac_agp_suspend)(struct pci_dev *bridge); +static int (*pmac_agp_resume)(struct pci_dev *bridge); + +void pmac_register_agp_pm(struct pci_dev *bridge, + int (*suspend)(struct pci_dev *bridge), + int (*resume)(struct pci_dev *bridge)) +{ + if (suspend || resume) { + pmac_agp_bridge = bridge; + pmac_agp_suspend = suspend; + pmac_agp_resume = resume; + return; + } + if (bridge != pmac_agp_bridge) + return; + pmac_agp_suspend = pmac_agp_resume = NULL; + return; +} +EXPORT_SYMBOL(pmac_register_agp_pm); + +void pmac_suspend_agp_for_card(struct pci_dev *dev) +{ + if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL) + return; + if (pmac_agp_bridge->bus != dev->bus) + return; + pmac_agp_suspend(pmac_agp_bridge); +} +EXPORT_SYMBOL(pmac_suspend_agp_for_card); + +void pmac_resume_agp_for_card(struct pci_dev *dev) +{ + if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL) + return; + if (pmac_agp_bridge->bus != dev->bus) + return; + pmac_agp_resume(pmac_agp_bridge); +} +EXPORT_SYMBOL(pmac_resume_agp_for_card); + +int pmac_get_uninorth_variant(void) +{ + return uninorth_maj; +} diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c new file mode 100644 index 0000000000..40f3aa432f --- /dev/null +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -0,0 +1,1514 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * arch/powerpc/platforms/powermac/low_i2c.c + * + * Copyright (C) 2003-2005 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * The linux i2c layer isn't completely suitable for our needs for various + * reasons ranging from too late initialisation to semantics not perfectly + * matching some requirements of the apple platform functions etc... + * + * This file thus provides a simple low level unified i2c interface for + * powermac that covers the various types of i2c busses used in Apple machines. + * For now, keywest, PMU and SMU, though we could add Cuda, or other bit + * banging busses found on older chipsets in earlier machines if we ever need + * one of them. + * + * The drivers in this file are synchronous/blocking. In addition, the + * keywest one is fairly slow due to the use of msleep instead of interrupts + * as the interrupt is currently used by i2c-keywest. In the long run, we + * might want to get rid of those high-level interfaces to linux i2c layer + * either completely (converting all drivers) or replacing them all with a + * single stub driver on top of this one. Once done, the interrupt will be + * available for our use. + */ + +#undef DEBUG +#undef DEBUG_LOW + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define DBG(x...) do {\ + printk(KERN_DEBUG "low_i2c:" x); \ + } while(0) +#else +#define DBG(x...) +#endif + +#ifdef DEBUG_LOW +#define DBG_LOW(x...) do {\ + printk(KERN_DEBUG "low_i2c:" x); \ + } while(0) +#else +#define DBG_LOW(x...) +#endif + + +static int pmac_i2c_force_poll = 1; + +/* + * A bus structure. Each bus in the system has such a structure associated. + */ +struct pmac_i2c_bus +{ + struct list_head link; + struct device_node *controller; + struct device_node *busnode; + int type; + int flags; + struct i2c_adapter adapter; + void *hostdata; + int channel; /* some hosts have multiple */ + int mode; /* current mode */ + struct mutex mutex; + int opened; + int polled; /* open mode */ + struct platform_device *platform_dev; + struct lock_class_key lock_key; + + /* ops */ + int (*open)(struct pmac_i2c_bus *bus); + void (*close)(struct pmac_i2c_bus *bus); + int (*xfer)(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len); +}; + +static LIST_HEAD(pmac_i2c_busses); + +/* + * Keywest implementation + */ + +struct pmac_i2c_host_kw +{ + struct mutex mutex; /* Access mutex for use by + * i2c-keywest */ + void __iomem *base; /* register base address */ + int bsteps; /* register stepping */ + int speed; /* speed */ + int irq; + u8 *data; + unsigned len; + int state; + int rw; + int polled; + int result; + struct completion complete; + spinlock_t lock; + struct timer_list timeout_timer; +}; + +/* Register indices */ +typedef enum { + reg_mode = 0, + reg_control, + reg_status, + reg_isr, + reg_ier, + reg_addr, + reg_subaddr, + reg_data +} reg_t; + +/* The Tumbler audio equalizer can be really slow sometimes */ +#define KW_POLL_TIMEOUT (2*HZ) + +/* Mode register */ +#define KW_I2C_MODE_100KHZ 0x00 +#define KW_I2C_MODE_50KHZ 0x01 +#define KW_I2C_MODE_25KHZ 0x02 +#define KW_I2C_MODE_DUMB 0x00 +#define KW_I2C_MODE_STANDARD 0x04 +#define KW_I2C_MODE_STANDARDSUB 0x08 +#define KW_I2C_MODE_COMBINED 0x0C +#define KW_I2C_MODE_MODE_MASK 0x0C +#define KW_I2C_MODE_CHAN_MASK 0xF0 + +/* Control register */ +#define KW_I2C_CTL_AAK 0x01 +#define KW_I2C_CTL_XADDR 0x02 +#define KW_I2C_CTL_STOP 0x04 +#define KW_I2C_CTL_START 0x08 + +/* Status register */ +#define KW_I2C_STAT_BUSY 0x01 +#define KW_I2C_STAT_LAST_AAK 0x02 +#define KW_I2C_STAT_LAST_RW 0x04 +#define KW_I2C_STAT_SDA 0x08 +#define KW_I2C_STAT_SCL 0x10 + +/* IER & ISR registers */ +#define KW_I2C_IRQ_DATA 0x01 +#define KW_I2C_IRQ_ADDR 0x02 +#define KW_I2C_IRQ_STOP 0x04 +#define KW_I2C_IRQ_START 0x08 +#define KW_I2C_IRQ_MASK 0x0F + +/* State machine states */ +enum { + state_idle, + state_addr, + state_read, + state_write, + state_stop, + state_dead +}; + +#define WRONG_STATE(name) do {\ + printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s " \ + "(isr: %02x)\n", \ + name, __kw_state_names[host->state], isr); \ + } while(0) + +static const char *__kw_state_names[] = { + "state_idle", + "state_addr", + "state_read", + "state_write", + "state_stop", + "state_dead" +}; + +static inline u8 __kw_read_reg(struct pmac_i2c_host_kw *host, reg_t reg) +{ + return readb(host->base + (((unsigned int)reg) << host->bsteps)); +} + +static inline void __kw_write_reg(struct pmac_i2c_host_kw *host, + reg_t reg, u8 val) +{ + writeb(val, host->base + (((unsigned)reg) << host->bsteps)); + (void)__kw_read_reg(host, reg_subaddr); +} + +#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val) +#define kw_read_reg(reg) __kw_read_reg(host, reg) + +static u8 kw_i2c_wait_interrupt(struct pmac_i2c_host_kw *host) +{ + int i, j; + u8 isr; + + for (i = 0; i < 1000; i++) { + isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK; + if (isr != 0) + return isr; + + /* This code is used with the timebase frozen, we cannot rely + * on udelay nor schedule when in polled mode ! + * For now, just use a bogus loop.... + */ + if (host->polled) { + for (j = 1; j < 100000; j++) + mb(); + } else + msleep(1); + } + return isr; +} + +static void kw_i2c_do_stop(struct pmac_i2c_host_kw *host, int result) +{ + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + host->state = state_stop; + host->result = result; +} + + +static void kw_i2c_handle_interrupt(struct pmac_i2c_host_kw *host, u8 isr) +{ + u8 ack; + + DBG_LOW("kw_handle_interrupt(%s, isr: %x)\n", + __kw_state_names[host->state], isr); + + if (host->state == state_idle) { + printk(KERN_WARNING "low_i2c: Keywest got an out of state" + " interrupt, ignoring\n"); + kw_write_reg(reg_isr, isr); + return; + } + + if (isr == 0) { + printk(KERN_WARNING "low_i2c: Timeout in i2c transfer" + " on keywest !\n"); + if (host->state != state_stop) { + kw_i2c_do_stop(host, -EIO); + return; + } + ack = kw_read_reg(reg_status); + if (ack & KW_I2C_STAT_BUSY) + kw_write_reg(reg_status, 0); + host->state = state_idle; + kw_write_reg(reg_ier, 0x00); + if (!host->polled) + complete(&host->complete); + return; + } + + if (isr & KW_I2C_IRQ_ADDR) { + ack = kw_read_reg(reg_status); + if (host->state != state_addr) { + WRONG_STATE("KW_I2C_IRQ_ADDR"); + kw_i2c_do_stop(host, -EIO); + } + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + host->result = -ENXIO; + host->state = state_stop; + DBG_LOW("KW: NAK on address\n"); + } else { + if (host->len == 0) + kw_i2c_do_stop(host, 0); + else if (host->rw) { + host->state = state_read; + if (host->len > 1) + kw_write_reg(reg_control, + KW_I2C_CTL_AAK); + } else { + host->state = state_write; + kw_write_reg(reg_data, *(host->data++)); + host->len--; + } + } + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + } + + if (isr & KW_I2C_IRQ_DATA) { + if (host->state == state_read) { + *(host->data++) = kw_read_reg(reg_data); + host->len--; + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + if (host->len == 0) + host->state = state_stop; + else if (host->len == 1) + kw_write_reg(reg_control, 0); + } else if (host->state == state_write) { + ack = kw_read_reg(reg_status); + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + DBG_LOW("KW: nack on data write\n"); + host->result = -EFBIG; + host->state = state_stop; + } else if (host->len) { + kw_write_reg(reg_data, *(host->data++)); + host->len--; + } else + kw_i2c_do_stop(host, 0); + } else { + WRONG_STATE("KW_I2C_IRQ_DATA"); + if (host->state != state_stop) + kw_i2c_do_stop(host, -EIO); + } + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + } + + if (isr & KW_I2C_IRQ_STOP) { + kw_write_reg(reg_isr, KW_I2C_IRQ_STOP); + if (host->state != state_stop) { + WRONG_STATE("KW_I2C_IRQ_STOP"); + host->result = -EIO; + } + host->state = state_idle; + if (!host->polled) + complete(&host->complete); + } + + /* Below should only happen in manual mode which we don't use ... */ + if (isr & KW_I2C_IRQ_START) + kw_write_reg(reg_isr, KW_I2C_IRQ_START); + +} + +/* Interrupt handler */ +static irqreturn_t kw_i2c_irq(int irq, void *dev_id) +{ + struct pmac_i2c_host_kw *host = dev_id; + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + del_timer(&host->timeout_timer); + kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr)); + if (host->state != state_idle) { + host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT; + add_timer(&host->timeout_timer); + } + spin_unlock_irqrestore(&host->lock, flags); + return IRQ_HANDLED; +} + +static void kw_i2c_timeout(struct timer_list *t) +{ + struct pmac_i2c_host_kw *host = from_timer(host, t, timeout_timer); + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + + /* + * If the timer is pending, that means we raced with the + * irq, in which case we just return + */ + if (timer_pending(&host->timeout_timer)) + goto skip; + + kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr)); + if (host->state != state_idle) { + host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT; + add_timer(&host->timeout_timer); + } + skip: + spin_unlock_irqrestore(&host->lock, flags); +} + +static int kw_i2c_open(struct pmac_i2c_bus *bus) +{ + struct pmac_i2c_host_kw *host = bus->hostdata; + mutex_lock(&host->mutex); + return 0; +} + +static void kw_i2c_close(struct pmac_i2c_bus *bus) +{ + struct pmac_i2c_host_kw *host = bus->hostdata; + mutex_unlock(&host->mutex); +} + +static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) +{ + struct pmac_i2c_host_kw *host = bus->hostdata; + u8 mode_reg = host->speed; + int use_irq = host->irq && !bus->polled; + + /* Setup mode & subaddress if any */ + switch(bus->mode) { + case pmac_i2c_mode_dumb: + return -EINVAL; + case pmac_i2c_mode_std: + mode_reg |= KW_I2C_MODE_STANDARD; + if (subsize != 0) + return -EINVAL; + break; + case pmac_i2c_mode_stdsub: + mode_reg |= KW_I2C_MODE_STANDARDSUB; + if (subsize != 1) + return -EINVAL; + break; + case pmac_i2c_mode_combined: + mode_reg |= KW_I2C_MODE_COMBINED; + if (subsize != 1) + return -EINVAL; + break; + } + + /* Setup channel & clear pending irqs */ + kw_write_reg(reg_isr, kw_read_reg(reg_isr)); + kw_write_reg(reg_mode, mode_reg | (bus->channel << 4)); + kw_write_reg(reg_status, 0); + + /* Set up address and r/w bit, strip possible stale bus number from + * address top bits + */ + kw_write_reg(reg_addr, addrdir & 0xff); + + /* Set up the sub address */ + if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB + || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED) + kw_write_reg(reg_subaddr, subaddr); + + /* Prepare for async operations */ + host->data = data; + host->len = len; + host->state = state_addr; + host->result = 0; + host->rw = (addrdir & 1); + host->polled = bus->polled; + + /* Enable interrupt if not using polled mode and interrupt is + * available + */ + if (use_irq) { + /* Clear completion */ + reinit_completion(&host->complete); + /* Ack stale interrupts */ + kw_write_reg(reg_isr, kw_read_reg(reg_isr)); + /* Arm timeout */ + host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT; + add_timer(&host->timeout_timer); + /* Enable emission */ + kw_write_reg(reg_ier, KW_I2C_IRQ_MASK); + } + + /* Start sending address */ + kw_write_reg(reg_control, KW_I2C_CTL_XADDR); + + /* Wait for completion */ + if (use_irq) + wait_for_completion(&host->complete); + else { + while(host->state != state_idle) { + unsigned long flags; + + u8 isr = kw_i2c_wait_interrupt(host); + spin_lock_irqsave(&host->lock, flags); + kw_i2c_handle_interrupt(host, isr); + spin_unlock_irqrestore(&host->lock, flags); + } + } + + /* Disable emission */ + kw_write_reg(reg_ier, 0); + + return host->result; +} + +static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) +{ + struct pmac_i2c_host_kw *host; + const u32 *psteps, *prate, *addrp; + u32 steps; + + host = kzalloc(sizeof(*host), GFP_KERNEL); + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n", + np); + return NULL; + } + + /* Apple is kind enough to provide a valid AAPL,address property + * on all i2c keywest nodes so far ... we would have to fallback + * to macio parsing if that wasn't the case + */ + addrp = of_get_property(np, "AAPL,address", NULL); + if (addrp == NULL) { + printk(KERN_ERR "low_i2c: Can't find address for %pOF\n", + np); + kfree(host); + return NULL; + } + mutex_init(&host->mutex); + init_completion(&host->complete); + spin_lock_init(&host->lock); + timer_setup(&host->timeout_timer, kw_i2c_timeout, 0); + + psteps = of_get_property(np, "AAPL,address-step", NULL); + steps = psteps ? (*psteps) : 0x10; + for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++) + steps >>= 1; + /* Select interface rate */ + host->speed = KW_I2C_MODE_25KHZ; + prate = of_get_property(np, "AAPL,i2c-rate", NULL); + if (prate) switch(*prate) { + case 100: + host->speed = KW_I2C_MODE_100KHZ; + break; + case 50: + host->speed = KW_I2C_MODE_50KHZ; + break; + case 25: + host->speed = KW_I2C_MODE_25KHZ; + break; + } + host->irq = irq_of_parse_and_map(np, 0); + if (!host->irq) + printk(KERN_WARNING + "low_i2c: Failed to map interrupt for %pOF\n", + np); + + host->base = ioremap((*addrp), 0x1000); + if (host->base == NULL) { + printk(KERN_ERR "low_i2c: Can't map registers for %pOF\n", + np); + kfree(host); + return NULL; + } + + /* Make sure IRQ is disabled */ + kw_write_reg(reg_ier, 0); + + /* Request chip interrupt. We set IRQF_NO_SUSPEND because we don't + * want that interrupt disabled between the 2 passes of driver + * suspend or we'll have issues running the pfuncs + */ + if (request_irq(host->irq, kw_i2c_irq, IRQF_NO_SUSPEND, + "keywest i2c", host)) + host->irq = 0; + + printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %pOF\n", + *addrp, host->irq, np); + + return host; +} + + +static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, + struct device_node *controller, + struct device_node *busnode, + int channel) +{ + struct pmac_i2c_bus *bus; + + bus = kzalloc(sizeof(struct pmac_i2c_bus), GFP_KERNEL); + if (bus == NULL) + return; + + bus->controller = of_node_get(controller); + bus->busnode = of_node_get(busnode); + bus->type = pmac_i2c_bus_keywest; + bus->hostdata = host; + bus->channel = channel; + bus->mode = pmac_i2c_mode_std; + bus->open = kw_i2c_open; + bus->close = kw_i2c_close; + bus->xfer = kw_i2c_xfer; + mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); + lockdep_set_class(&bus->mutex, &bus->lock_key); + if (controller == busnode) + bus->flags = pmac_i2c_multibus; + list_add(&bus->link, &pmac_i2c_busses); + + printk(KERN_INFO " channel %d bus %s\n", channel, + (controller == busnode) ? "" : busnode->full_name); +} + +static void __init kw_i2c_probe(void) +{ + struct device_node *np, *child, *parent; + + /* Probe keywest-i2c busses */ + for_each_compatible_node(np, "i2c","keywest-i2c") { + struct pmac_i2c_host_kw *host; + int multibus; + + /* Found one, init a host structure */ + host = kw_i2c_host_init(np); + if (host == NULL) + continue; + + /* Now check if we have a multibus setup (old style) or if we + * have proper bus nodes. Note that the "new" way (proper bus + * nodes) might cause us to not create some busses that are + * kept hidden in the device-tree. In the future, we might + * want to work around that by creating busses without a node + * but not for now + */ + child = of_get_next_child(np, NULL); + multibus = !of_node_name_eq(child, "i2c-bus"); + of_node_put(child); + + /* For a multibus setup, we get the bus count based on the + * parent type + */ + if (multibus) { + int chans, i; + + parent = of_get_parent(np); + if (parent == NULL) + continue; + chans = parent->name[0] == 'u' ? 2 : 1; + of_node_put(parent); + for (i = 0; i < chans; i++) + kw_i2c_add(host, np, np, i); + } else { + for_each_child_of_node(np, child) { + const u32 *reg = of_get_property(child, + "reg", NULL); + if (reg == NULL) + continue; + kw_i2c_add(host, np, child, *reg); + } + } + } +} + + +/* + * + * PMU implementation + * + */ + +#ifdef CONFIG_ADB_PMU + +/* + * i2c command block to the PMU + */ +struct pmu_i2c_hdr { + u8 bus; + u8 mode; + u8 bus2; + u8 address; + u8 sub_addr; + u8 comb_addr; + u8 count; + u8 data[]; +}; + +static void pmu_i2c_complete(struct adb_request *req) +{ + complete(req->arg); +} + +static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) +{ + struct adb_request *req = bus->hostdata; + struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req->data[1]; + struct completion comp; + int read = addrdir & 1; + int retry; + int rc = 0; + + /* For now, limit ourselves to 16 bytes transfers */ + if (len > 16) + return -EINVAL; + + init_completion(&comp); + + for (retry = 0; retry < 16; retry++) { + memset(req, 0, sizeof(struct adb_request)); + hdr->bus = bus->channel; + hdr->count = len; + + switch(bus->mode) { + case pmac_i2c_mode_std: + if (subsize != 0) + return -EINVAL; + hdr->address = addrdir; + hdr->mode = PMU_I2C_MODE_SIMPLE; + break; + case pmac_i2c_mode_stdsub: + case pmac_i2c_mode_combined: + if (subsize != 1) + return -EINVAL; + hdr->address = addrdir & 0xfe; + hdr->comb_addr = addrdir; + hdr->sub_addr = subaddr; + if (bus->mode == pmac_i2c_mode_stdsub) + hdr->mode = PMU_I2C_MODE_STDSUB; + else + hdr->mode = PMU_I2C_MODE_COMBINED; + break; + default: + return -EINVAL; + } + + reinit_completion(&comp); + req->data[0] = PMU_I2C_CMD; + req->reply[0] = 0xff; + req->nbytes = sizeof(struct pmu_i2c_hdr) + 1; + req->done = pmu_i2c_complete; + req->arg = ∁ + if (!read && len) { + memcpy(hdr->data, data, len); + req->nbytes += len; + } + rc = pmu_queue_request(req); + if (rc) + return rc; + wait_for_completion(&comp); + if (req->reply[0] == PMU_I2C_STATUS_OK) + break; + msleep(15); + } + if (req->reply[0] != PMU_I2C_STATUS_OK) + return -EIO; + + for (retry = 0; retry < 16; retry++) { + memset(req, 0, sizeof(struct adb_request)); + + /* I know that looks like a lot, slow as hell, but darwin + * does it so let's be on the safe side for now + */ + msleep(15); + + hdr->bus = PMU_I2C_BUS_STATUS; + + reinit_completion(&comp); + req->data[0] = PMU_I2C_CMD; + req->reply[0] = 0xff; + req->nbytes = 2; + req->done = pmu_i2c_complete; + req->arg = ∁ + rc = pmu_queue_request(req); + if (rc) + return rc; + wait_for_completion(&comp); + + if (req->reply[0] == PMU_I2C_STATUS_OK && !read) + return 0; + if (req->reply[0] == PMU_I2C_STATUS_DATAREAD && read) { + int rlen = req->reply_len - 1; + + if (rlen != len) { + printk(KERN_WARNING "low_i2c: PMU returned %d" + " bytes, expected %d !\n", rlen, len); + return -EIO; + } + if (len) + memcpy(data, &req->reply[1], len); + return 0; + } + } + return -EIO; +} + +static void __init pmu_i2c_probe(void) +{ + struct pmac_i2c_bus *bus; + struct device_node *busnode; + int channel, sz; + + if (!pmu_present()) + return; + + /* There might or might not be a "pmu-i2c" node, we use that + * or via-pmu itself, whatever we find. I haven't seen a machine + * with separate bus nodes, so we assume a multibus setup + */ + busnode = of_find_node_by_name(NULL, "pmu-i2c"); + if (busnode == NULL) + busnode = of_find_node_by_name(NULL, "via-pmu"); + if (busnode == NULL) + return; + + printk(KERN_INFO "PMU i2c %pOF\n", busnode); + + /* + * We add bus 1 and 2 only for now, bus 0 is "special" + */ + for (channel = 1; channel <= 2; channel++) { + sz = sizeof(struct pmac_i2c_bus) + sizeof(struct adb_request); + bus = kzalloc(sz, GFP_KERNEL); + if (bus == NULL) + return; + + bus->controller = busnode; + bus->busnode = busnode; + bus->type = pmac_i2c_bus_pmu; + bus->channel = channel; + bus->mode = pmac_i2c_mode_std; + bus->hostdata = bus + 1; + bus->xfer = pmu_i2c_xfer; + mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); + lockdep_set_class(&bus->mutex, &bus->lock_key); + bus->flags = pmac_i2c_multibus; + list_add(&bus->link, &pmac_i2c_busses); + + printk(KERN_INFO " channel %d bus \n", channel); + } +} + +#endif /* CONFIG_ADB_PMU */ + + +/* + * + * SMU implementation + * + */ + +#ifdef CONFIG_PMAC_SMU + +static void smu_i2c_complete(struct smu_i2c_cmd *cmd, void *misc) +{ + complete(misc); +} + +static int smu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) +{ + struct smu_i2c_cmd *cmd = bus->hostdata; + struct completion comp; + int read = addrdir & 1; + int rc = 0; + + if ((read && len > SMU_I2C_READ_MAX) || + ((!read) && len > SMU_I2C_WRITE_MAX)) + return -EINVAL; + + memset(cmd, 0, sizeof(struct smu_i2c_cmd)); + cmd->info.bus = bus->channel; + cmd->info.devaddr = addrdir; + cmd->info.datalen = len; + + switch(bus->mode) { + case pmac_i2c_mode_std: + if (subsize != 0) + return -EINVAL; + cmd->info.type = SMU_I2C_TRANSFER_SIMPLE; + break; + case pmac_i2c_mode_stdsub: + case pmac_i2c_mode_combined: + if (subsize > 3 || subsize < 1) + return -EINVAL; + cmd->info.sublen = subsize; + /* that's big-endian only but heh ! */ + memcpy(&cmd->info.subaddr, ((char *)&subaddr) + (4 - subsize), + subsize); + if (bus->mode == pmac_i2c_mode_stdsub) + cmd->info.type = SMU_I2C_TRANSFER_STDSUB; + else + cmd->info.type = SMU_I2C_TRANSFER_COMBINED; + break; + default: + return -EINVAL; + } + if (!read && len) + memcpy(cmd->info.data, data, len); + + init_completion(&comp); + cmd->done = smu_i2c_complete; + cmd->misc = ∁ + rc = smu_queue_i2c(cmd); + if (rc < 0) + return rc; + wait_for_completion(&comp); + rc = cmd->status; + + if (read && len) + memcpy(data, cmd->info.data, len); + return rc < 0 ? rc : 0; +} + +static void __init smu_i2c_probe(void) +{ + struct device_node *controller, *busnode; + struct pmac_i2c_bus *bus; + const u32 *reg; + int sz; + + if (!smu_present()) + return; + + controller = of_find_node_by_name(NULL, "smu-i2c-control"); + if (controller == NULL) + controller = of_find_node_by_name(NULL, "smu"); + if (controller == NULL) + return; + + printk(KERN_INFO "SMU i2c %pOF\n", controller); + + /* Look for childs, note that they might not be of the right + * type as older device trees mix i2c busses and other things + * at the same level + */ + for_each_child_of_node(controller, busnode) { + if (!of_node_is_type(busnode, "i2c") && + !of_node_is_type(busnode, "i2c-bus")) + continue; + reg = of_get_property(busnode, "reg", NULL); + if (reg == NULL) + continue; + + sz = sizeof(struct pmac_i2c_bus) + sizeof(struct smu_i2c_cmd); + bus = kzalloc(sz, GFP_KERNEL); + if (bus == NULL) + return; + + bus->controller = controller; + bus->busnode = of_node_get(busnode); + bus->type = pmac_i2c_bus_smu; + bus->channel = *reg; + bus->mode = pmac_i2c_mode_std; + bus->hostdata = bus + 1; + bus->xfer = smu_i2c_xfer; + mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); + lockdep_set_class(&bus->mutex, &bus->lock_key); + bus->flags = 0; + list_add(&bus->link, &pmac_i2c_busses); + + printk(KERN_INFO " channel %x bus %pOF\n", + bus->channel, busnode); + } +} + +#endif /* CONFIG_PMAC_SMU */ + +/* + * + * Core code + * + */ + + +struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node) +{ + struct device_node *p = of_node_get(node); + struct device_node *prev = NULL; + struct pmac_i2c_bus *bus; + + while(p) { + list_for_each_entry(bus, &pmac_i2c_busses, link) { + if (p == bus->busnode) { + if (prev && bus->flags & pmac_i2c_multibus) { + const u32 *reg; + reg = of_get_property(prev, "reg", + NULL); + if (!reg) + continue; + if (((*reg) >> 8) != bus->channel) + continue; + } + of_node_put(p); + of_node_put(prev); + return bus; + } + } + of_node_put(prev); + prev = p; + p = of_get_parent(p); + } + return NULL; +} +EXPORT_SYMBOL_GPL(pmac_i2c_find_bus); + +u8 pmac_i2c_get_dev_addr(struct device_node *device) +{ + const u32 *reg = of_get_property(device, "reg", NULL); + + if (reg == NULL) + return 0; + + return (*reg) & 0xff; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_dev_addr); + +struct device_node *pmac_i2c_get_controller(struct pmac_i2c_bus *bus) +{ + return bus->controller; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_controller); + +struct device_node *pmac_i2c_get_bus_node(struct pmac_i2c_bus *bus) +{ + return bus->busnode; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_bus_node); + +int pmac_i2c_get_type(struct pmac_i2c_bus *bus) +{ + return bus->type; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_type); + +int pmac_i2c_get_flags(struct pmac_i2c_bus *bus) +{ + return bus->flags; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_flags); + +int pmac_i2c_get_channel(struct pmac_i2c_bus *bus) +{ + return bus->channel; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_channel); + + +struct i2c_adapter *pmac_i2c_get_adapter(struct pmac_i2c_bus *bus) +{ + return &bus->adapter; +} +EXPORT_SYMBOL_GPL(pmac_i2c_get_adapter); + +struct pmac_i2c_bus *pmac_i2c_adapter_to_bus(struct i2c_adapter *adapter) +{ + struct pmac_i2c_bus *bus; + + list_for_each_entry(bus, &pmac_i2c_busses, link) + if (&bus->adapter == adapter) + return bus; + return NULL; +} +EXPORT_SYMBOL_GPL(pmac_i2c_adapter_to_bus); + +int pmac_i2c_match_adapter(struct device_node *dev, struct i2c_adapter *adapter) +{ + struct pmac_i2c_bus *bus = pmac_i2c_find_bus(dev); + + if (bus == NULL) + return 0; + return (&bus->adapter == adapter); +} +EXPORT_SYMBOL_GPL(pmac_i2c_match_adapter); + +int pmac_low_i2c_lock(struct device_node *np) +{ + struct pmac_i2c_bus *bus, *found = NULL; + + list_for_each_entry(bus, &pmac_i2c_busses, link) { + if (np == bus->controller) { + found = bus; + break; + } + } + if (!found) + return -ENODEV; + return pmac_i2c_open(bus, 0); +} +EXPORT_SYMBOL_GPL(pmac_low_i2c_lock); + +int pmac_low_i2c_unlock(struct device_node *np) +{ + struct pmac_i2c_bus *bus, *found = NULL; + + list_for_each_entry(bus, &pmac_i2c_busses, link) { + if (np == bus->controller) { + found = bus; + break; + } + } + if (!found) + return -ENODEV; + pmac_i2c_close(bus); + return 0; +} +EXPORT_SYMBOL_GPL(pmac_low_i2c_unlock); + + +int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled) +{ + int rc; + + mutex_lock(&bus->mutex); + bus->polled = polled || pmac_i2c_force_poll; + bus->opened = 1; + bus->mode = pmac_i2c_mode_std; + if (bus->open && (rc = bus->open(bus)) != 0) { + bus->opened = 0; + mutex_unlock(&bus->mutex); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(pmac_i2c_open); + +void pmac_i2c_close(struct pmac_i2c_bus *bus) +{ + WARN_ON(!bus->opened); + if (bus->close) + bus->close(bus); + bus->opened = 0; + mutex_unlock(&bus->mutex); +} +EXPORT_SYMBOL_GPL(pmac_i2c_close); + +int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode) +{ + WARN_ON(!bus->opened); + + /* Report me if you see the error below as there might be a new + * "combined4" mode that I need to implement for the SMU bus + */ + if (mode < pmac_i2c_mode_dumb || mode > pmac_i2c_mode_combined) { + printk(KERN_ERR "low_i2c: Invalid mode %d requested on" + " bus %pOF !\n", mode, bus->busnode); + return -EINVAL; + } + bus->mode = mode; + + return 0; +} +EXPORT_SYMBOL_GPL(pmac_i2c_setmode); + +int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, + u32 subaddr, u8 *data, int len) +{ + int rc; + + WARN_ON(!bus->opened); + + DBG("xfer() chan=%d, addrdir=0x%x, mode=%d, subsize=%d, subaddr=0x%x," + " %d bytes, bus %pOF\n", bus->channel, addrdir, bus->mode, subsize, + subaddr, len, bus->busnode); + + rc = bus->xfer(bus, addrdir, subsize, subaddr, data, len); + +#ifdef DEBUG + if (rc) + DBG("xfer error %d\n", rc); +#endif + return rc; +} +EXPORT_SYMBOL_GPL(pmac_i2c_xfer); + +/* some quirks for platform function decoding */ +enum { + pmac_i2c_quirk_invmask = 0x00000001u, + pmac_i2c_quirk_skip = 0x00000002u, +}; + +static void pmac_i2c_devscan(void (*callback)(struct device_node *dev, + int quirks)) +{ + struct pmac_i2c_bus *bus; + struct device_node *np; + static struct whitelist_ent { + char *name; + char *compatible; + int quirks; + } whitelist[] = { + /* XXX Study device-tree's & apple drivers are get the quirks + * right ! + */ + /* Workaround: It seems that running the clockspreading + * properties on the eMac will cause lockups during boot. + * The machine seems to work fine without that. So for now, + * let's make sure i2c-hwclock doesn't match about "imic" + * clocks and we'll figure out if we really need to do + * something special about those later. + */ + { "i2c-hwclock", "imic5002", pmac_i2c_quirk_skip }, + { "i2c-hwclock", "imic5003", pmac_i2c_quirk_skip }, + { "i2c-hwclock", NULL, pmac_i2c_quirk_invmask }, + { "i2c-cpu-voltage", NULL, 0}, + { "temp-monitor", NULL, 0 }, + { "supply-monitor", NULL, 0 }, + { NULL, NULL, 0 }, + }; + + /* Only some devices need to have platform functions instantiated + * here. For now, we have a table. Others, like 9554 i2c GPIOs used + * on Xserve, if we ever do a driver for them, will use their own + * platform function instance + */ + list_for_each_entry(bus, &pmac_i2c_busses, link) { + for_each_child_of_node(bus->busnode, np) { + struct whitelist_ent *p; + /* If multibus, check if device is on that bus */ + if (bus->flags & pmac_i2c_multibus) + if (bus != pmac_i2c_find_bus(np)) + continue; + for (p = whitelist; p->name != NULL; p++) { + if (!of_node_name_eq(np, p->name)) + continue; + if (p->compatible && + !of_device_is_compatible(np, p->compatible)) + continue; + if (p->quirks & pmac_i2c_quirk_skip) + break; + callback(np, p->quirks); + break; + } + } + } +} + +#define MAX_I2C_DATA 64 + +struct pmac_i2c_pf_inst +{ + struct pmac_i2c_bus *bus; + u8 addr; + u8 buffer[MAX_I2C_DATA]; + u8 scratch[MAX_I2C_DATA]; + int bytes; + int quirks; +}; + +static void* pmac_i2c_do_begin(struct pmf_function *func, struct pmf_args *args) +{ + struct pmac_i2c_pf_inst *inst; + struct pmac_i2c_bus *bus; + + bus = pmac_i2c_find_bus(func->node); + if (bus == NULL) { + printk(KERN_ERR "low_i2c: Can't find bus for %pOF (pfunc)\n", + func->node); + return NULL; + } + if (pmac_i2c_open(bus, 0)) { + printk(KERN_ERR "low_i2c: Can't open i2c bus for %pOF (pfunc)\n", + func->node); + return NULL; + } + + /* XXX might need GFP_ATOMIC when called during the suspend process, + * but then, there are already lots of issues with suspending when + * near OOM that need to be resolved, the allocator itself should + * probably make GFP_NOIO implicit during suspend + */ + inst = kzalloc(sizeof(struct pmac_i2c_pf_inst), GFP_KERNEL); + if (inst == NULL) { + pmac_i2c_close(bus); + return NULL; + } + inst->bus = bus; + inst->addr = pmac_i2c_get_dev_addr(func->node); + inst->quirks = (int)(long)func->driver_data; + return inst; +} + +static void pmac_i2c_do_end(struct pmf_function *func, void *instdata) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + if (inst == NULL) + return; + pmac_i2c_close(inst->bus); + kfree(inst); +} + +static int pmac_i2c_do_read(PMF_STD_ARGS, u32 len) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + inst->bytes = len; + return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 0, 0, + inst->buffer, len); +} + +static int pmac_i2c_do_write(PMF_STD_ARGS, u32 len, const u8 *data) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0, + (u8 *)data, len); +} + +/* This function is used to do the masking & OR'ing for the "rmw" type + * callbacks. Ze should apply the mask and OR in the values in the + * buffer before writing back. The problem is that it seems that + * various darwin drivers implement the mask/or differently, thus + * we need to check the quirks first + */ +static void pmac_i2c_do_apply_rmw(struct pmac_i2c_pf_inst *inst, + u32 len, const u8 *mask, const u8 *val) +{ + int i; + + if (inst->quirks & pmac_i2c_quirk_invmask) { + for (i = 0; i < len; i ++) + inst->scratch[i] = (inst->buffer[i] & mask[i]) | val[i]; + } else { + for (i = 0; i < len; i ++) + inst->scratch[i] = (inst->buffer[i] & ~mask[i]) + | (val[i] & mask[i]); + } +} + +static int pmac_i2c_do_rmw(PMF_STD_ARGS, u32 masklen, u32 valuelen, + u32 totallen, const u8 *maskdata, + const u8 *valuedata) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + if (masklen > inst->bytes || valuelen > inst->bytes || + totallen > inst->bytes || valuelen > masklen) + return -EINVAL; + + pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata); + + return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0, + inst->scratch, totallen); +} + +static int pmac_i2c_do_read_sub(PMF_STD_ARGS, u8 subaddr, u32 len) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + inst->bytes = len; + return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 1, subaddr, + inst->buffer, len); +} + +static int pmac_i2c_do_write_sub(PMF_STD_ARGS, u8 subaddr, u32 len, + const u8 *data) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1, + subaddr, (u8 *)data, len); +} + +static int pmac_i2c_do_set_mode(PMF_STD_ARGS, int mode) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + return pmac_i2c_setmode(inst->bus, mode); +} + +static int pmac_i2c_do_rmw_sub(PMF_STD_ARGS, u8 subaddr, u32 masklen, + u32 valuelen, u32 totallen, const u8 *maskdata, + const u8 *valuedata) +{ + struct pmac_i2c_pf_inst *inst = instdata; + + if (masklen > inst->bytes || valuelen > inst->bytes || + totallen > inst->bytes || valuelen > masklen) + return -EINVAL; + + pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata); + + return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1, + subaddr, inst->scratch, totallen); +} + +static int pmac_i2c_do_mask_and_comp(PMF_STD_ARGS, u32 len, + const u8 *maskdata, + const u8 *valuedata) +{ + struct pmac_i2c_pf_inst *inst = instdata; + int i, match; + + /* Get return value pointer, it's assumed to be a u32 */ + if (!args || !args->count || !args->u[0].p) + return -EINVAL; + + /* Check buffer */ + if (len > inst->bytes) + return -EINVAL; + + for (i = 0, match = 1; match && i < len; i ++) + if ((inst->buffer[i] & maskdata[i]) != valuedata[i]) + match = 0; + *args->u[0].p = match; + return 0; +} + +static int pmac_i2c_do_delay(PMF_STD_ARGS, u32 duration) +{ + msleep((duration + 999) / 1000); + return 0; +} + + +static struct pmf_handlers pmac_i2c_pfunc_handlers = { + .begin = pmac_i2c_do_begin, + .end = pmac_i2c_do_end, + .read_i2c = pmac_i2c_do_read, + .write_i2c = pmac_i2c_do_write, + .rmw_i2c = pmac_i2c_do_rmw, + .read_i2c_sub = pmac_i2c_do_read_sub, + .write_i2c_sub = pmac_i2c_do_write_sub, + .rmw_i2c_sub = pmac_i2c_do_rmw_sub, + .set_i2c_mode = pmac_i2c_do_set_mode, + .mask_and_compare = pmac_i2c_do_mask_and_comp, + .delay = pmac_i2c_do_delay, +}; + +static void __init pmac_i2c_dev_create(struct device_node *np, int quirks) +{ + DBG("dev_create(%pOF)\n", np); + + pmf_register_driver(np, &pmac_i2c_pfunc_handlers, + (void *)(long)quirks); +} + +static void __init pmac_i2c_dev_init(struct device_node *np, int quirks) +{ + DBG("dev_create(%pOF)\n", np); + + pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_INIT, NULL); +} + +static void pmac_i2c_dev_suspend(struct device_node *np, int quirks) +{ + DBG("dev_suspend(%pOF)\n", np); + pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_SLEEP, NULL); +} + +static void pmac_i2c_dev_resume(struct device_node *np, int quirks) +{ + DBG("dev_resume(%pOF)\n", np); + pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_WAKE, NULL); +} + +void pmac_pfunc_i2c_suspend(void) +{ + pmac_i2c_devscan(pmac_i2c_dev_suspend); +} + +void pmac_pfunc_i2c_resume(void) +{ + pmac_i2c_devscan(pmac_i2c_dev_resume); +} + +/* + * Initialize us: probe all i2c busses on the machine, instantiate + * busses and platform functions as needed. + */ +/* This is non-static as it might be called early by smp code */ +int __init pmac_i2c_init(void) +{ + static int i2c_inited; + + if (i2c_inited) + return 0; + i2c_inited = 1; + + /* Probe keywest-i2c busses */ + kw_i2c_probe(); + +#ifdef CONFIG_ADB_PMU + /* Probe PMU i2c busses */ + pmu_i2c_probe(); +#endif + +#ifdef CONFIG_PMAC_SMU + /* Probe SMU i2c busses */ + smu_i2c_probe(); +#endif + + /* Now add platform functions for some known devices */ + pmac_i2c_devscan(pmac_i2c_dev_create); + + return 0; +} +machine_arch_initcall(powermac, pmac_i2c_init); + +/* Since pmac_i2c_init can be called too early for the platform device + * registration, we need to do it at a later time. In our case, subsys + * happens to fit well, though I agree it's a bit of a hack... + */ +static int __init pmac_i2c_create_platform_devices(void) +{ + struct pmac_i2c_bus *bus; + int i = 0; + + /* In the case where we are initialized from smp_init(), we must + * not use the timer (and thus the irq). It's safe from now on + * though + */ + pmac_i2c_force_poll = 0; + + /* Create platform devices */ + list_for_each_entry(bus, &pmac_i2c_busses, link) { + bus->platform_dev = + platform_device_alloc("i2c-powermac", i++); + if (bus->platform_dev == NULL) + return -ENOMEM; + bus->platform_dev->dev.platform_data = bus; + bus->platform_dev->dev.of_node = bus->busnode; + platform_device_add(bus->platform_dev); + } + + /* Now call platform "init" functions */ + pmac_i2c_devscan(pmac_i2c_dev_init); + + return 0; +} +machine_subsys_initcall(powermac, pmac_i2c_create_platform_devices); diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c new file mode 100644 index 0000000000..fe2e0249cb --- /dev/null +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -0,0 +1,656 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * Todo: - add support for the OF persistent properties + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */ + +#define CORE99_SIGNATURE 0x5a +#define CORE99_ADLER_START 0x14 + +/* On Core99, nvram is either a sharp, a micron or an AMD flash */ +#define SM_FLASH_STATUS_DONE 0x80 +#define SM_FLASH_STATUS_ERR 0x38 + +#define SM_FLASH_CMD_ERASE_CONFIRM 0xd0 +#define SM_FLASH_CMD_ERASE_SETUP 0x20 +#define SM_FLASH_CMD_RESET 0xff +#define SM_FLASH_CMD_WRITE_SETUP 0x40 +#define SM_FLASH_CMD_CLEAR_STATUS 0x50 +#define SM_FLASH_CMD_READ_STATUS 0x70 + +/* CHRP NVRAM header */ +struct chrp_header { + u8 signature; + u8 cksum; + u16 len; + char name[12]; + u8 data[]; +}; + +struct core99_header { + struct chrp_header hdr; + u32 adler; + u32 generation; + u32 reserved[2]; +}; + +/* + * Read and write the non-volatile RAM on PowerMacs and CHRP machines. + */ +static int nvram_naddrs; +static volatile unsigned char __iomem *nvram_data; +static int is_core_99; +static int core99_bank; +static int nvram_partitions[3]; +// XXX Turn that into a sem +static DEFINE_RAW_SPINLOCK(nv_lock); + +static int (*core99_write_bank)(int bank, u8* datas); +static int (*core99_erase_bank)(int bank); + +static char *nvram_image; + + +static unsigned char core99_nvram_read_byte(int addr) +{ + if (nvram_image == NULL) + return 0xff; + return nvram_image[addr]; +} + +static void core99_nvram_write_byte(int addr, unsigned char val) +{ + if (nvram_image == NULL) + return; + nvram_image[addr] = val; +} + +static ssize_t core99_nvram_read(char *buf, size_t count, loff_t *index) +{ + int i; + + if (nvram_image == NULL) + return -ENODEV; + if (*index > NVRAM_SIZE) + return 0; + + i = *index; + if (i + count > NVRAM_SIZE) + count = NVRAM_SIZE - i; + + memcpy(buf, &nvram_image[i], count); + *index = i + count; + return count; +} + +static ssize_t core99_nvram_write(char *buf, size_t count, loff_t *index) +{ + int i; + + if (nvram_image == NULL) + return -ENODEV; + if (*index > NVRAM_SIZE) + return 0; + + i = *index; + if (i + count > NVRAM_SIZE) + count = NVRAM_SIZE - i; + + memcpy(&nvram_image[i], buf, count); + *index = i + count; + return count; +} + +static ssize_t core99_nvram_size(void) +{ + if (nvram_image == NULL) + return -ENODEV; + return NVRAM_SIZE; +} + +#ifdef CONFIG_PPC32 +static volatile unsigned char __iomem *nvram_addr; +static int nvram_mult; + +static ssize_t ppc32_nvram_size(void) +{ + return NVRAM_SIZE; +} + +static unsigned char direct_nvram_read_byte(int addr) +{ + return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]); +} + +static void direct_nvram_write_byte(int addr, unsigned char val) +{ + out_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult], val); +} + + +static unsigned char indirect_nvram_read_byte(int addr) +{ + unsigned char val; + unsigned long flags; + + raw_spin_lock_irqsave(&nv_lock, flags); + out_8(nvram_addr, addr >> 5); + val = in_8(&nvram_data[(addr & 0x1f) << 4]); + raw_spin_unlock_irqrestore(&nv_lock, flags); + + return val; +} + +static void indirect_nvram_write_byte(int addr, unsigned char val) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&nv_lock, flags); + out_8(nvram_addr, addr >> 5); + out_8(&nvram_data[(addr & 0x1f) << 4], val); + raw_spin_unlock_irqrestore(&nv_lock, flags); +} + + +#ifdef CONFIG_ADB_PMU + +static void pmu_nvram_complete(struct adb_request *req) +{ + if (req->arg) + complete((struct completion *)req->arg); +} + +static unsigned char pmu_nvram_read_byte(int addr) +{ + struct adb_request req; + DECLARE_COMPLETION_ONSTACK(req_complete); + + req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL; + if (pmu_request(&req, pmu_nvram_complete, 3, PMU_READ_NVRAM, + (addr >> 8) & 0xff, addr & 0xff)) + return 0xff; + if (system_state == SYSTEM_RUNNING) + wait_for_completion(&req_complete); + while (!req.complete) + pmu_poll(); + return req.reply[0]; +} + +static void pmu_nvram_write_byte(int addr, unsigned char val) +{ + struct adb_request req; + DECLARE_COMPLETION_ONSTACK(req_complete); + + req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL; + if (pmu_request(&req, pmu_nvram_complete, 4, PMU_WRITE_NVRAM, + (addr >> 8) & 0xff, addr & 0xff, val)) + return; + if (system_state == SYSTEM_RUNNING) + wait_for_completion(&req_complete); + while (!req.complete) + pmu_poll(); +} + +#endif /* CONFIG_ADB_PMU */ +#endif /* CONFIG_PPC32 */ + +static u8 chrp_checksum(struct chrp_header* hdr) +{ + u8 *ptr; + u16 sum = hdr->signature; + for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++) + sum += *ptr; + while (sum > 0xFF) + sum = (sum & 0xFF) + (sum>>8); + return sum; +} + +static u32 core99_calc_adler(u8 *buffer) +{ + int cnt; + u32 low, high; + + buffer += CORE99_ADLER_START; + low = 1; + high = 0; + for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) { + if ((cnt % 5000) == 0) { + high %= 65521UL; + high %= 65521UL; + } + low += buffer[cnt]; + high += low; + } + low %= 65521UL; + high %= 65521UL; + + return (high << 16) | low; +} + +static u32 __init core99_check(u8 *datas) +{ + struct core99_header* hdr99 = (struct core99_header*)datas; + + if (hdr99->hdr.signature != CORE99_SIGNATURE) { + DBG("Invalid signature\n"); + return 0; + } + if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) { + DBG("Invalid checksum\n"); + return 0; + } + if (hdr99->adler != core99_calc_adler(datas)) { + DBG("Invalid adler\n"); + return 0; + } + return hdr99->generation; +} + +static int sm_erase_bank(int bank) +{ + int stat; + unsigned long timeout; + + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank); + + out_8(base, SM_FLASH_CMD_ERASE_SETUP); + out_8(base, SM_FLASH_CMD_ERASE_CONFIRM); + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: Sharp/Micron flash erase timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + + if (memchr_inv(base, 0xff, NVRAM_SIZE)) { + printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n"); + return -ENXIO; + } + return 0; +} + +static int sm_write_bank(int bank, u8* datas) +{ + int i, stat = 0; + unsigned long timeout; + + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Writing bank %d...\n", bank); + + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + if (!(stat & SM_FLASH_STATUS_DONE)) + break; + } + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + if (memcmp(base, datas, NVRAM_SIZE)) { + printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n"); + return -ENXIO; + } + return 0; +} + +static int amd_erase_bank(int bank) +{ + int stat = 0; + unsigned long timeout; + + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: AMD Erasing bank %d...\n", bank); + + /* Unlock 1 */ + out_8(base+0x555, 0xaa); + udelay(1); + /* Unlock 2 */ + out_8(base+0x2aa, 0x55); + udelay(1); + + /* Sector-Erase */ + out_8(base+0x555, 0x80); + udelay(1); + out_8(base+0x555, 0xaa); + udelay(1); + out_8(base+0x2aa, 0x55); + udelay(1); + out_8(base, 0x30); + udelay(1); + + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: AMD flash erase timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + if (memchr_inv(base, 0xff, NVRAM_SIZE)) { + printk(KERN_ERR "nvram: AMD flash erase failed !\n"); + return -ENXIO; + } + return 0; +} + +static int amd_write_bank(int bank, u8* datas) +{ + int i, stat = 0; + unsigned long timeout; + + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: AMD Writing bank %d...\n", bank); + + for (i=0; i 1000000) { + printk(KERN_ERR "nvram: AMD flash write timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + if (stat != 0) + break; + } + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + if (memcmp(base, datas, NVRAM_SIZE)) { + printk(KERN_ERR "nvram: AMD flash write failed !\n"); + return -ENXIO; + } + return 0; +} + +static void __init lookup_partitions(void) +{ + u8 buffer[17]; + int i, offset; + struct chrp_header* hdr; + + if (pmac_newworld) { + nvram_partitions[pmac_nvram_OF] = -1; + nvram_partitions[pmac_nvram_XPRAM] = -1; + nvram_partitions[pmac_nvram_NR] = -1; + hdr = (struct chrp_header *)buffer; + + offset = 0; + buffer[16] = 0; + do { + for (i=0;i<16;i++) + buffer[i] = ppc_md.nvram_read_val(offset+i); + if (!strcmp(hdr->name, "common")) + nvram_partitions[pmac_nvram_OF] = offset + 0x10; + if (!strcmp(hdr->name, "APL,MacOS75")) { + nvram_partitions[pmac_nvram_XPRAM] = offset + 0x10; + nvram_partitions[pmac_nvram_NR] = offset + 0x110; + } + offset += (hdr->len * 0x10); + } while(offset < NVRAM_SIZE); + } else { + nvram_partitions[pmac_nvram_OF] = 0x1800; + nvram_partitions[pmac_nvram_XPRAM] = 0x1300; + nvram_partitions[pmac_nvram_NR] = 0x1400; + } + DBG("nvram: OF partition at 0x%x\n", nvram_partitions[pmac_nvram_OF]); + DBG("nvram: XP partition at 0x%x\n", nvram_partitions[pmac_nvram_XPRAM]); + DBG("nvram: NR partition at 0x%x\n", nvram_partitions[pmac_nvram_NR]); +} + +static void core99_nvram_sync(void) +{ + struct core99_header* hdr99; + unsigned long flags; + + if (!is_core_99 || !nvram_data || !nvram_image) + return; + + raw_spin_lock_irqsave(&nv_lock, flags); + if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE, + NVRAM_SIZE)) + goto bail; + + DBG("Updating nvram...\n"); + + hdr99 = (struct core99_header*)nvram_image; + hdr99->generation++; + hdr99->hdr.signature = CORE99_SIGNATURE; + hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr); + hdr99->adler = core99_calc_adler(nvram_image); + core99_bank = core99_bank ? 0 : 1; + if (core99_erase_bank) + if (core99_erase_bank(core99_bank)) { + printk("nvram: Error erasing bank %d\n", core99_bank); + goto bail; + } + if (core99_write_bank) + if (core99_write_bank(core99_bank, nvram_image)) + printk("nvram: Error writing bank %d\n", core99_bank); + bail: + raw_spin_unlock_irqrestore(&nv_lock, flags); + +#ifdef DEBUG + mdelay(2000); +#endif +} + +static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr) +{ + int i; + u32 gen_bank0, gen_bank1; + + if (nvram_naddrs < 1) { + printk(KERN_ERR "nvram: no address\n"); + return -EINVAL; + } + nvram_image = memblock_alloc(NVRAM_SIZE, SMP_CACHE_BYTES); + if (!nvram_image) + panic("%s: Failed to allocate %u bytes\n", __func__, + NVRAM_SIZE); + nvram_data = ioremap(addr, NVRAM_SIZE*2); + nvram_naddrs = 1; /* Make sure we get the correct case */ + + DBG("nvram: Checking bank 0...\n"); + + gen_bank0 = core99_check((u8 *)nvram_data); + gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE); + core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0; + + DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1); + DBG("nvram: Active bank is: %d\n", core99_bank); + + for (i=0; i 0x100) + return 0xff; + + return ppc_md.nvram_read_val(xpaddr + offset); +} + +void pmac_xpram_write(int xpaddr, u8 data) +{ + int offset = pmac_get_partition(pmac_nvram_XPRAM); + + if (offset < 0 || xpaddr < 0 || xpaddr > 0x100) + return; + + ppc_md.nvram_write_val(xpaddr + offset, data); +} + +EXPORT_SYMBOL(pmac_get_partition); +EXPORT_SYMBOL(pmac_xpram_read); +EXPORT_SYMBOL(pmac_xpram_write); diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c new file mode 100644 index 0000000000..d71359b533 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pci.c @@ -0,0 +1,1261 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support for PCI bridges found on Power Macintoshes. + * + * Copyright (C) 2003-2005 Benjamin Herrenschmuidt (benh@kernel.crashing.org) + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +/* XXX Could be per-controller, but I don't think we risk anything by + * assuming we won't have both UniNorth and Bandit */ +static int has_uninorth; +#ifdef CONFIG_PPC64 +static struct pci_controller *u3_agp; +#else +static int has_second_ohare; +#endif /* CONFIG_PPC64 */ + +extern int pcibios_assign_bus_offset; + +struct device_node *k2_skiplist[2]; + +/* + * Magic constants for enabling cache coherency in the bandit/PSX bridge. + */ +#define BANDIT_DEVID_2 8 +#define BANDIT_REVID 3 + +#define BANDIT_DEVNUM 11 +#define BANDIT_MAGIC 0x50 +#define BANDIT_COHERENT 0x40 + +static int __init fixup_one_level_bus_range(struct device_node *node, int higher) +{ + for (; node; node = node->sibling) { + const int * bus_range; + const unsigned int *class_code; + int len; + + /* For PCI<->PCI bridges or CardBus bridges, we go down */ + class_code = of_get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + bus_range = of_get_property(node, "bus-range", &len); + if (bus_range != NULL && len > 2 * sizeof(int)) { + if (bus_range[1] > higher) + higher = bus_range[1]; + } + higher = fixup_one_level_bus_range(node->child, higher); + } + return higher; +} + +/* This routine fixes the "bus-range" property of all bridges in the + * system since they tend to have their "last" member wrong on macs + * + * Note that the bus numbers manipulated here are OF bus numbers, they + * are not Linux bus numbers. + */ +static void __init fixup_bus_range(struct device_node *bridge) +{ + int *bus_range, len; + struct property *prop; + + /* Lookup the "bus-range" property for the hose */ + prop = of_find_property(bridge, "bus-range", &len); + if (prop == NULL || prop->length < 2 * sizeof(int)) + return; + + bus_range = prop->value; + bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); +} + +/* + * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers. + * + * The "Bandit" version is present in all early PCI PowerMacs, + * and up to the first ones using Grackle. Some machines may + * have 2 bandit controllers (2 PCI busses). + * + * "Chaos" is used in some "Bandit"-type machines as a bridge + * for the separate display bus. It is accessed the same + * way as bandit, but cannot be probed for devices. It therefore + * has its own config access functions. + * + * The "UniNorth" version is present in all Core99 machines + * (iBook, G4, new IMacs, and all the recent Apple machines). + * It contains 3 controllers in one ASIC. + * + * The U3 is the bridge used on G5 machines. It contains an + * AGP bus which is dealt with the old UniNorth access routines + * and a HyperTransport bus which uses its own set of access + * functions. + */ + +#define MACRISC_CFA0(devfn, off) \ + ((1 << (unsigned int)PCI_SLOT(dev_fn)) \ + | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \ + | (((unsigned int)(off)) & 0xFCUL)) + +#define MACRISC_CFA1(bus, devfn, off) \ + ((((unsigned int)(bus)) << 16) \ + |(((unsigned int)(devfn)) << 8) \ + |(((unsigned int)(off)) & 0xFCUL) \ + |1UL) + +static void __iomem *macrisc_cfg_map_bus(struct pci_bus *bus, + unsigned int dev_fn, + int offset) +{ + unsigned int caddr; + struct pci_controller *hose; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return NULL; + + if (bus->number == hose->first_busno) { + if (dev_fn < (11 << 3)) + return NULL; + caddr = MACRISC_CFA0(dev_fn, offset); + } else + caddr = MACRISC_CFA1(bus->number, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= has_uninorth ? 0x07 : 0x03; + return hose->cfg_data + offset; +} + +static struct pci_ops macrisc_pci_ops = +{ + .map_bus = macrisc_cfg_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +#ifdef CONFIG_PPC32 +/* + * Verify that a specific (bus, dev_fn) exists on chaos + */ +static void __iomem *chaos_map_bus(struct pci_bus *bus, unsigned int devfn, + int offset) +{ + struct device_node *np; + const u32 *vendor, *device; + + if (offset >= 0x100) + return NULL; + np = of_pci_find_child_device(bus->dev.of_node, devfn); + if (np == NULL) + return NULL; + + vendor = of_get_property(np, "vendor-id", NULL); + device = of_get_property(np, "device-id", NULL); + if (vendor == NULL || device == NULL) + return NULL; + + if ((*vendor == 0x106b) && (*device == 3) && (offset >= 0x10) + && (offset != 0x14) && (offset != 0x18) && (offset <= 0x24)) + return NULL; + + return macrisc_cfg_map_bus(bus, devfn, offset); +} + +static struct pci_ops chaos_pci_ops = +{ + .map_bus = chaos_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +static void __init setup_chaos(struct pci_controller *hose, + struct resource *addr) +{ + /* assume a `chaos' bridge */ + hose->ops = &chaos_pci_ops; + hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000); +} +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 +/* + * These versions of U3 HyperTransport config space access ops do not + * implement self-view of the HT host yet + */ + +/* + * This function deals with some "special cases" devices. + * + * 0 -> No special case + * 1 -> Skip the device but act as if the access was successful + * (return 0xff's on reads, eventually, cache config space + * accesses in a later version) + * -1 -> Hide the device (unsuccessful access) + */ +static int u3_ht_skip_device(struct pci_controller *hose, + struct pci_bus *bus, unsigned int devfn) +{ + struct device_node *busdn, *dn; + int i; + + /* We only allow config cycles to devices that are in OF device-tree + * as we are apparently having some weird things going on with some + * revs of K2 on recent G5s, except for the host bridge itself, which + * is missing from the tree but we know we can probe. + */ + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else if (devfn == 0) + return 0; + else + busdn = hose->dn; + for (dn = busdn->child; dn; dn = dn->sibling) + if (PCI_DN(dn) && PCI_DN(dn)->devfn == devfn) + break; + if (dn == NULL) + return -1; + + /* + * When a device in K2 is powered down, we die on config + * cycle accesses. Fix that here. + */ + for (i=0; i<2; i++) + if (k2_skiplist[i] == dn) + return 1; + + return 0; +} + +#define U3_HT_CFA0(devfn, off) \ + ((((unsigned int)devfn) << 8) | offset) +#define U3_HT_CFA1(bus, devfn, off) \ + (U3_HT_CFA0(devfn, off) \ + + (((unsigned int)bus) << 16) \ + + 0x01000000UL) + +static void __iomem *u3_ht_cfg_access(struct pci_controller *hose, u8 bus, + u8 devfn, u8 offset, int *swap) +{ + *swap = 1; + if (bus == hose->first_busno) { + if (devfn != 0) + return hose->cfg_data + U3_HT_CFA0(devfn, offset); + *swap = 0; + return ((void __iomem *)hose->cfg_addr) + (offset << 2); + } else + return hose->cfg_data + U3_HT_CFA1(bus, devfn, offset); +} + +static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + void __iomem *addr; + int swap; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x100) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + switch (len) { + case 1: + *val = 0xff; break; + case 2: + *val = 0xffff; break; + default: + *val = 0xfffffffful; break; + } + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8(addr); + break; + case 2: + *val = swap ? in_le16(addr) : in_be16(addr); + break; + default: + *val = swap ? in_le32(addr) : in_be32(addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + void __iomem *addr; + int swap; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x100) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8(addr, val); + break; + case 2: + swap ? out_le16(addr, val) : out_be16(addr, val); + break; + default: + swap ? out_le32(addr, val) : out_be32(addr, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_ht_pci_ops = +{ + .read = u3_ht_read_config, + .write = u3_ht_write_config, +}; + +#define U4_PCIE_CFA0(devfn, off) \ + ((1 << ((unsigned int)PCI_SLOT(dev_fn))) \ + | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \ + | ((((unsigned int)(off)) >> 8) << 28) \ + | (((unsigned int)(off)) & 0xfcU)) + +#define U4_PCIE_CFA1(bus, devfn, off) \ + ((((unsigned int)(bus)) << 16) \ + |(((unsigned int)(devfn)) << 8) \ + | ((((unsigned int)(off)) >> 8) << 28) \ + |(((unsigned int)(off)) & 0xfcU) \ + |1UL) + +static void __iomem *u4_pcie_cfg_map_bus(struct pci_bus *bus, + unsigned int dev_fn, + int offset) +{ + struct pci_controller *hose; + unsigned int caddr; + + if (offset >= 0x1000) + return NULL; + + hose = pci_bus_to_host(bus); + if (!hose) + return NULL; + + if (bus->number == hose->first_busno) { + caddr = U4_PCIE_CFA0(dev_fn, offset); + } else + caddr = U4_PCIE_CFA1(bus->number, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= 0x03; + return hose->cfg_data + offset; +} + +static struct pci_ops u4_pcie_pci_ops = +{ + .map_bus = u4_pcie_cfg_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +static void pmac_pci_fixup_u4_of_node(struct pci_dev *dev) +{ + /* Apple's device-tree "hides" the root complex virtual P2P bridge + * on U4. However, Linux sees it, causing the PCI <-> OF matching + * code to fail to properly match devices below it. This works around + * it by setting the node of the bridge to point to the PHB node, + * which is not entirely correct but fixes the matching code and + * doesn't break anything else. It's also the simplest possible fix. + */ + if (dev->dev.of_node == NULL) + dev->dev.of_node = pcibios_get_phb_of_node(dev->bus); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, 0x5b, pmac_pci_fixup_u4_of_node); + +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_PPC32 +/* + * For a bandit bridge, turn on cache coherency if necessary. + * N.B. we could clean this up using the hose ops directly. + */ +static void __init init_bandit(struct pci_controller *bp) +{ + unsigned int vendev, magic; + int rev; + + /* read the word at offset 0 in config space for device 11 */ + out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + PCI_VENDOR_ID); + udelay(2); + vendev = in_le32(bp->cfg_data); + if (vendev == (PCI_DEVICE_ID_APPLE_BANDIT << 16) + + PCI_VENDOR_ID_APPLE) { + /* read the revision id */ + out_le32(bp->cfg_addr, + (1UL << BANDIT_DEVNUM) + PCI_REVISION_ID); + udelay(2); + rev = in_8(bp->cfg_data); + if (rev != BANDIT_REVID) + printk(KERN_WARNING + "Unknown revision %d for bandit\n", rev); + } else if (vendev != (BANDIT_DEVID_2 << 16) + PCI_VENDOR_ID_APPLE) { + printk(KERN_WARNING "bandit isn't? (%x)\n", vendev); + return; + } + + /* read the word at offset 0x50 */ + out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + BANDIT_MAGIC); + udelay(2); + magic = in_le32(bp->cfg_data); + if ((magic & BANDIT_COHERENT) != 0) + return; + magic |= BANDIT_COHERENT; + udelay(2); + out_le32(bp->cfg_data, magic); + printk(KERN_INFO "Cache coherency enabled for bandit/PSX\n"); +} + +/* + * Tweak the PCI-PCI bridge chip on the blue & white G3s. + */ +static void __init init_p2pbridge(void) +{ + struct device_node *p2pbridge; + struct pci_controller* hose; + u8 bus, devfn; + u16 val; + + /* XXX it would be better here to identify the specific + PCI-PCI bridge chip we have. */ + p2pbridge = of_find_node_by_name(NULL, "pci-bridge"); + if (p2pbridge == NULL || !of_node_name_eq(p2pbridge->parent, "pci")) + goto done; + if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) { + DBG("Can't find PCI infos for PCI<->PCI bridge\n"); + goto done; + } + /* Warning: At this point, we have not yet renumbered all busses. + * So we must use OF walking to find out hose + */ + hose = pci_find_hose_for_OF_device(p2pbridge); + if (!hose) { + DBG("Can't find hose for PCI<->PCI bridge\n"); + goto done; + } + if (early_read_config_word(hose, bus, devfn, + PCI_BRIDGE_CONTROL, &val) < 0) { + printk(KERN_ERR "init_p2pbridge: couldn't read bridge" + " control\n"); + goto done; + } + val &= ~PCI_BRIDGE_CTL_MASTER_ABORT; + early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val); +done: + of_node_put(p2pbridge); +} + +static void __init init_second_ohare(void) +{ + struct device_node *np = of_find_node_by_name(NULL, "pci106b,7"); + unsigned char bus, devfn; + unsigned short cmd; + + if (np == NULL) + return; + + /* This must run before we initialize the PICs since the second + * ohare hosts a PIC that will be accessed there. + */ + if (pci_device_from_OF_node(np, &bus, &devfn) == 0) { + struct pci_controller* hose = + pci_find_hose_for_OF_device(np); + if (!hose) { + printk(KERN_ERR "Can't find PCI hose for OHare2 !\n"); + of_node_put(np); + return; + } + early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; + cmd &= ~PCI_COMMAND_IO; + early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd); + } + has_second_ohare = 1; + of_node_put(np); +} + +/* + * Some Apple desktop machines have a NEC PD720100A USB2 controller + * on the motherboard. Open Firmware, on these, will disable the + * EHCI part of it so it behaves like a pair of OHCI's. This fixup + * code re-enables it ;) + */ +static void __init fixup_nec_usb2(void) +{ + struct device_node *nec; + + for_each_node_by_name(nec, "usb") { + struct pci_controller *hose; + u32 data; + const u32 *prop; + u8 bus, devfn; + + prop = of_get_property(nec, "vendor-id", NULL); + if (prop == NULL) + continue; + if (0x1033 != *prop) + continue; + prop = of_get_property(nec, "device-id", NULL); + if (prop == NULL) + continue; + if (0x0035 != *prop) + continue; + prop = of_get_property(nec, "reg", NULL); + if (prop == NULL) + continue; + devfn = (prop[0] >> 8) & 0xff; + bus = (prop[0] >> 16) & 0xff; + if (PCI_FUNC(devfn) != 0) + continue; + hose = pci_find_hose_for_OF_device(nec); + if (!hose) + continue; + early_read_config_dword(hose, bus, devfn, 0xe4, &data); + if (data & 1UL) { + printk("Found NEC PD720100A USB2 chip with disabled" + " EHCI, fixing up...\n"); + data &= ~1UL; + early_write_config_dword(hose, bus, devfn, 0xe4, data); + } + } +} + +static void __init setup_bandit(struct pci_controller *hose, + struct resource *addr) +{ + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000); + init_bandit(hose); +} + +static int __init setup_uninorth(struct pci_controller *hose, + struct resource *addr) +{ + pci_add_flags(PCI_REASSIGN_ALL_BUS); + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000); + /* We "know" that the bridge at f2000000 has the PCI slots. */ + return addr->start == 0xf2000000; +} +#endif /* CONFIG_PPC32 */ + +#ifdef CONFIG_PPC64 +static void __init setup_u3_agp(struct pci_controller* hose) +{ + /* On G5, we move AGP up to high bus number so we don't need + * to reassign bus numbers for HT. If we ever have P2P bridges + * on AGP, we'll have to move pci_assign_all_busses to the + * pci_controller structure so we enable it for AGP and not for + * HT childs. + * We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->first_busno = 0xf0; + hose->last_busno = 0xff; + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + u3_agp = hose; +} + +static void __init setup_u4_pcie(struct pci_controller* hose) +{ + /* We currently only implement the "non-atomic" config space, to + * be optimised later. + */ + hose->ops = &u4_pcie_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + /* The bus contains a bridge from root -> device, we need to + * make it visible on bus 0 so that we pick the right type + * of config cycles. If we didn't, we would have to force all + * config cycles to be type 1. So we override the "bus-range" + * property here + */ + hose->first_busno = 0x00; + hose->last_busno = 0xff; +} + +static void __init parse_region_decode(struct pci_controller *hose, + u32 decode) +{ + unsigned long base, end, next = -1; + int i, cur = -1; + + /* Iterate through all bits. We ignore the last bit as this region is + * reserved for the ROM among other niceties + */ + for (i = 0; i < 31; i++) { + if ((decode & (0x80000000 >> i)) == 0) + continue; + if (i < 16) { + base = 0xf0000000 | (((u32)i) << 24); + end = base + 0x00ffffff; + } else { + base = ((u32)i-16) << 28; + end = base + 0x0fffffff; + } + if (base != next) { + if (++cur >= 3) { + printk(KERN_WARNING "PCI: Too many ranges !\n"); + break; + } + hose->mem_resources[cur].flags = IORESOURCE_MEM; + hose->mem_resources[cur].name = hose->dn->full_name; + hose->mem_resources[cur].start = base; + hose->mem_resources[cur].end = end; + hose->mem_offset[cur] = 0; + DBG(" %d: 0x%08lx-0x%08lx\n", cur, base, end); + } else { + DBG(" : -0x%08lx\n", end); + hose->mem_resources[cur].end = end; + } + next = end + 1; + } +} + +static void __init setup_u3_ht(struct pci_controller* hose) +{ + struct device_node *np = hose->dn; + struct resource cfg_res, self_res; + u32 decode; + + hose->ops = &u3_ht_pci_ops; + + /* Get base addresses from OF tree + */ + if (of_address_to_resource(np, 0, &cfg_res) || + of_address_to_resource(np, 1, &self_res)) { + printk(KERN_ERR "PCI: Failed to get U3/U4 HT resources !\n"); + return; + } + + /* Map external cfg space access into cfg_data and self registers + * into cfg_addr + */ + hose->cfg_data = ioremap(cfg_res.start, 0x02000000); + hose->cfg_addr = ioremap(self_res.start, resource_size(&self_res)); + + /* + * /ht node doesn't expose a "ranges" property, we read the register + * that controls the decoding logic and use that for memory regions. + * The IO region is hard coded since it is fixed in HW as well. + */ + hose->io_base_phys = 0xf4000000; + hose->pci_io_size = 0x00400000; + hose->io_resource.name = np->full_name; + hose->io_resource.start = 0; + hose->io_resource.end = 0x003fffff; + hose->io_resource.flags = IORESOURCE_IO; + hose->first_busno = 0; + hose->last_busno = 0xef; + + /* Note: fix offset when cfg_addr becomes a void * */ + decode = in_be32(hose->cfg_addr + 0x80); + + DBG("PCI: Apple HT bridge decode register: 0x%08x\n", decode); + + /* NOTE: The decode register setup is a bit weird... region + * 0xf8000000 for example is marked as enabled in there while it's + & actually the memory controller registers. + * That means that we are incorrectly attributing it to HT. + * + * In a similar vein, region 0xf4000000 is actually the HT IO space but + * also marked as enabled in here and 0xf9000000 is used by some other + * internal bits of the northbridge. + * + * Unfortunately, we can't just mask out those bit as we would end + * up with more regions than we can cope (linux can only cope with + * 3 memory regions for a PHB at this stage). + * + * So for now, we just do a little hack. We happen to -know- that + * Apple firmware doesn't assign things below 0xfa000000 for that + * bridge anyway so we mask out all bits we don't want. + */ + decode &= 0x003fffff; + + /* Now parse the resulting bits and build resources */ + parse_region_decode(hose, decode); +} +#endif /* CONFIG_PPC64 */ + +/* + * We assume that if we have a G3 powermac, we have one bridge called + * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise, + * if we have one or more bandit or chaos bridges, we don't have a MPC106. + */ +static int __init pmac_add_bridge(struct device_node *dev) +{ + int len; + struct pci_controller *hose; + struct resource rsrc; + char *disp_name; + const int *bus_range; + int primary = 1; + + DBG("Adding PCI host bridge %pOF\n", dev); + + /* Fetch host bridge registers address */ + of_address_to_resource(dev, 0, &rsrc); + + /* Get bus range if any */ + bus_range = of_get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %pOF, assume" + " bus 0\n", dev); + } + + hose = pcibios_alloc_controller(dev); + if (!hose) + return -ENOMEM; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + hose->controller_ops = pmac_pci_controller_ops; + + disp_name = NULL; + + /* 64 bits only bridges */ +#ifdef CONFIG_PPC64 + if (of_device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose); + disp_name = "U3-AGP"; + primary = 0; + } else if (of_device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose); + disp_name = "U3-HT"; + primary = 1; + } else if (of_device_is_compatible(dev, "u4-pcie")) { + setup_u4_pcie(hose); + disp_name = "U4-PCIE"; + primary = 0; + } + printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number:" + " %d->%d\n", disp_name, hose->first_busno, hose->last_busno); +#endif /* CONFIG_PPC64 */ + + /* 32 bits only bridges */ +#ifdef CONFIG_PPC32 + if (of_device_is_compatible(dev, "uni-north")) { + primary = setup_uninorth(hose, &rsrc); + disp_name = "UniNorth"; + } else if (of_node_name_eq(dev, "pci")) { + /* XXX assume this is a mpc106 (grackle) */ + setup_grackle(hose); + disp_name = "Grackle (MPC106)"; + } else if (of_node_name_eq(dev, "bandit")) { + setup_bandit(hose, &rsrc); + disp_name = "Bandit"; + } else if (of_node_name_eq(dev, "chaos")) { + setup_chaos(hose, &rsrc); + disp_name = "Chaos"; + primary = 0; + } + printk(KERN_INFO "Found %s PCI host bridge at 0x%016llx. " + "Firmware bus number: %d->%d\n", + disp_name, (unsigned long long)rsrc.start, hose->first_busno, + hose->last_busno); +#endif /* CONFIG_PPC32 */ + + DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", + hose, hose->cfg_addr, hose->cfg_data); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); + + /* create pci_dn's for DT nodes under this PHB */ + if (IS_ENABLED(CONFIG_PPC64)) + pci_devs_phb_init_dynamic(hose); + + return 0; +} + +void pmac_pci_irq_fixup(struct pci_dev *dev) +{ +#ifdef CONFIG_PPC32 + /* Fixup interrupt for the modem/ethernet combo controller. + * on machines with a second ohare chip. + * The number in the device tree (27) is bogus (correct for + * the ethernet-only board but not the combo ethernet/modem + * board). The real interrupt is 28 on the second controller + * -> 28+32 = 60. + */ + if (has_second_ohare && + dev->vendor == PCI_VENDOR_ID_DEC && + dev->device == PCI_DEVICE_ID_DEC_TULIP_PLUS) { + dev->irq = irq_create_mapping(NULL, 60); + irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW); + } +#endif /* CONFIG_PPC32 */ +} + +#ifdef CONFIG_PPC64 +static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_controller *hose = pci_bus_to_host(bridge->bus); + struct device_node *np, *child; + + if (hose != u3_agp) + return 0; + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions for now. We should do something better in the + * future though + */ + np = hose->dn; + PCI_DN(np)->busno = 0xf0; + for_each_child_of_node(np, child) + PCI_DN(child)->busno = 0xf0; + + return 0; +} +#endif /* CONFIG_PPC64 */ + +void __init pmac_pci_init(void) +{ + struct device_node *np, *root; + struct device_node *ht __maybe_unused = NULL; + + pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN); + + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "pmac_pci_init: can't find root " + "of device tree\n"); + return; + } + for_each_child_of_node(root, np) { + if (of_node_name_eq(np, "bandit") + || of_node_name_eq(np, "chaos") + || of_node_name_eq(np, "pci")) { + if (pmac_add_bridge(np) == 0) + of_node_get(np); + } + if (of_node_name_eq(np, "ht")) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + +#ifdef CONFIG_PPC64 + /* Probe HT last as it relies on the agp resources to be already + * setup + */ + if (ht && pmac_add_bridge(ht) != 0) + of_node_put(ht); + + ppc_md.pcibios_root_bridge_prepare = pmac_pci_root_bridge_prepare; + /* pmac_check_ht_link(); */ + +#else /* CONFIG_PPC64 */ + init_p2pbridge(); + init_second_ohare(); + fixup_nec_usb2(); + + /* We are still having some issues with the Xserve G4, enabling + * some offset between bus number and domains for now when we + * assign all busses should help for now + */ + if (pci_has_flag(PCI_REASSIGN_ALL_BUS)) + pcibios_assign_bus_offset = 0x10; +#endif +} + +#ifdef CONFIG_PPC32 +static bool pmac_pci_enable_device_hook(struct pci_dev *dev) +{ + struct device_node* node; + int updatecfg = 0; + int uninorth_child; + + node = pci_device_to_OF_node(dev); + + /* We don't want to enable USB controllers absent from the OF tree + * (iBook second controller) + */ + if (dev->vendor == PCI_VENDOR_ID_APPLE + && dev->class == PCI_CLASS_SERIAL_USB_OHCI + && !node) { + printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n", + pci_name(dev)); + return false; + } + + if (!node) + return true; + + uninorth_child = node->parent && + of_device_is_compatible(node->parent, "uni-north"); + + /* Firewire & GMAC were disabled after PCI probe, the driver is + * claiming them, we must re-enable them now. + */ + if (uninorth_child && of_node_name_eq(node, "firewire") && + (of_device_is_compatible(node, "pci106b,18") || + of_device_is_compatible(node, "pci106b,30") || + of_device_is_compatible(node, "pci11c1,5811"))) { + pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, node, 0, 1); + pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1); + updatecfg = 1; + } + if (uninorth_child && of_node_name_eq(node, "ethernet") && + of_device_is_compatible(node, "gmac")) { + pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1); + updatecfg = 1; + } + + /* + * Fixup various header fields on 32 bits. We don't do that on + * 64 bits as some of these have strange values behind the HT + * bridge and we must not, for example, enable MWI or set the + * cache line size on them. + */ + if (updatecfg) { + u16 cmd; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER + | PCI_COMMAND_INVALIDATE; + pci_write_config_word(dev, PCI_COMMAND, cmd); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16); + + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, + L1_CACHE_BYTES >> 2); + } + + return true; +} + +static void pmac_pci_fixup_ohci(struct pci_dev *dev) +{ + struct device_node *node = pci_device_to_OF_node(dev); + + /* We don't want to assign resources to USB controllers + * absent from the OF tree (iBook second controller) + */ + if (dev->class == PCI_CLASS_SERIAL_USB_OHCI && !node) + dev->resource[0].flags = 0; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_ANY_ID, pmac_pci_fixup_ohci); + +/* We power down some devices after they have been probed. They'll + * be powered back on later on + */ +void __init pmac_pcibios_after_init(void) +{ + struct device_node* nd; + + for_each_node_by_name(nd, "firewire") { + if (nd->parent && (of_device_is_compatible(nd, "pci106b,18") || + of_device_is_compatible(nd, "pci106b,30") || + of_device_is_compatible(nd, "pci11c1,5811")) + && of_device_is_compatible(nd->parent, "uni-north")) { + pmac_call_feature(PMAC_FTR_1394_ENABLE, nd, 0, 0); + pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, nd, 0, 0); + } + } + for_each_node_by_name(nd, "ethernet") { + if (nd->parent && of_device_is_compatible(nd, "gmac") + && of_device_is_compatible(nd->parent, "uni-north")) + pmac_call_feature(PMAC_FTR_GMAC_ENABLE, nd, 0, 0); + } +} + +static void pmac_pci_fixup_cardbus(struct pci_dev *dev) +{ + if (!machine_is(powermac)) + return; + /* + * Fix the interrupt routing on the various cardbus bridges + * used on powerbooks + */ + if (dev->vendor != PCI_VENDOR_ID_TI) + return; + if (dev->device == PCI_DEVICE_ID_TI_1130 || + dev->device == PCI_DEVICE_ID_TI_1131) { + u8 val; + /* Enable PCI interrupt */ + if (pci_read_config_byte(dev, 0x91, &val) == 0) + pci_write_config_byte(dev, 0x91, val | 0x30); + /* Disable ISA interrupt mode */ + if (pci_read_config_byte(dev, 0x92, &val) == 0) + pci_write_config_byte(dev, 0x92, val & ~0x06); + } + if (dev->device == PCI_DEVICE_ID_TI_1210 || + dev->device == PCI_DEVICE_ID_TI_1211 || + dev->device == PCI_DEVICE_ID_TI_1410 || + dev->device == PCI_DEVICE_ID_TI_1510) { + u8 val; + /* 0x8c == TI122X_IRQMUX, 2 says to route the INTA + signal out the MFUNC0 pin */ + if (pci_read_config_byte(dev, 0x8c, &val) == 0) + pci_write_config_byte(dev, 0x8c, (val & ~0x0f) | 2); + /* Disable ISA interrupt mode */ + if (pci_read_config_byte(dev, 0x92, &val) == 0) + pci_write_config_byte(dev, 0x92, val & ~0x06); + } +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus); + +static void pmac_pci_fixup_pciata(struct pci_dev *dev) +{ + u8 progif = 0; + + /* + * On PowerMacs, we try to switch any PCI ATA controller to + * fully native mode + */ + if (!machine_is(powermac)) + return; + + /* Some controllers don't have the class IDE */ + if (dev->vendor == PCI_VENDOR_ID_PROMISE) + switch(dev->device) { + case PCI_DEVICE_ID_PROMISE_20246: + case PCI_DEVICE_ID_PROMISE_20262: + case PCI_DEVICE_ID_PROMISE_20263: + case PCI_DEVICE_ID_PROMISE_20265: + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20270: + case PCI_DEVICE_ID_PROMISE_20271: + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20276: + case PCI_DEVICE_ID_PROMISE_20277: + goto good; + } + /* Others, check PCI class */ + if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) + return; + good: + pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); + if ((progif & 5) != 5) { + printk(KERN_INFO "PCI: %s Forcing PCI IDE into native mode\n", + pci_name(dev)); + (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5); + if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || + (progif & 5) != 5) + printk(KERN_ERR "Rewrite of PROGIF failed !\n"); + else { + /* Clear IO BARs, they will be reassigned */ + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, 0); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, 0); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_2, 0); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_3, 0); + } + } +} +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pmac_pci_fixup_pciata); +#endif /* CONFIG_PPC32 */ + +/* + * Disable second function on K2-SATA, it's broken + * and disable IO BARs on first one + */ +static void fixup_k2_sata(struct pci_dev* dev) +{ + int i; + u16 cmd; + + if (PCI_FUNC(dev->devfn) > 0) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 6; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, + 0); + } + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~PCI_COMMAND_IO; + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 5; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, + 0); + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata); + +/* + * On U4 (aka CPC945) the PCIe root complex "P2P" bridge resource ranges aren't + * configured by the firmware. The bridge itself seems to ignore them but it + * causes problems with Linux which then re-assigns devices below the bridge, + * thus changing addresses of those devices from what was in the device-tree, + * which sucks when those are video cards using offb + * + * We could just mark it transparent but I prefer fixing up the resources to + * properly show what's going on here, as I have some doubts about having them + * badly configured potentially being an issue for DMA. + * + * We leave PIO alone, it seems to be fine + * + * Oh and there's another funny bug. The OF properties advertize the region + * 0xf1000000..0xf1ffffff as being forwarded as memory space. But that's + * actually not true, this region is the memory mapped config space. So we + * also need to filter it out or we'll map things in the wrong place. + */ +static void fixup_u4_pcie(struct pci_dev* dev) +{ + struct pci_controller *host = pci_bus_to_host(dev->bus); + struct resource *region = NULL; + u32 reg; + int i; + + /* Only do that on PowerMac */ + if (!machine_is(powermac)) + return; + + /* Find the largest MMIO region */ + for (i = 0; i < 3; i++) { + struct resource *r = &host->mem_resources[i]; + if (!(r->flags & IORESOURCE_MEM)) + continue; + /* Skip the 0xf0xxxxxx..f2xxxxxx regions, we know they + * are reserved by HW for other things + */ + if (r->start >= 0xf0000000 && r->start < 0xf3000000) + continue; + if (!region || resource_size(r) > resource_size(region)) + region = r; + } + /* Nothing found, bail */ + if (!region) + return; + + /* Print things out */ + printk(KERN_INFO "PCI: Fixup U4 PCIe bridge range: %pR\n", region); + + /* Fixup bridge config space. We know it's a Mac, resource aren't + * offset so let's just blast them as-is. We also know that they + * fit in 32 bits + */ + reg = ((region->start >> 16) & 0xfff0) | (region->end & 0xfff00000); + pci_write_config_dword(dev, PCI_MEMORY_BASE, reg); + pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0); + pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie); + +#ifdef CONFIG_PPC64 +static int pmac_pci_probe_mode(struct pci_bus *bus) +{ + struct device_node *node = pci_bus_to_OF_node(bus); + + /* We need to use normal PCI probing for the AGP bus, + * since the device for the AGP bridge isn't in the tree. + * Same for the PCIe host on U4 and the HT host bridge. + */ + if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") || + of_device_is_compatible(node, "u4-pcie") || + of_device_is_compatible(node, "u3-ht"))) + return PCI_PROBE_NORMAL; + return PCI_PROBE_DEVTREE; +} +#endif /* CONFIG_PPC64 */ + +struct pci_controller_ops pmac_pci_controller_ops = { +#ifdef CONFIG_PPC64 + .probe_mode = pmac_pci_probe_mode, +#endif +#ifdef CONFIG_PPC32 + .enable_device_hook = pmac_pci_enable_device_hook, +#endif +}; diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c new file mode 100644 index 0000000000..085e0ad20e --- /dev/null +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#undef DEBUG +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static irqreturn_t macio_gpio_irq(int irq, void *data) +{ + pmf_do_irq(data); + + return IRQ_HANDLED; +} + +static int macio_do_gpio_irq_enable(struct pmf_function *func) +{ + unsigned int irq = irq_of_parse_and_map(func->node, 0); + if (!irq) + return -EINVAL; + return request_irq(irq, macio_gpio_irq, 0, func->node->name, func); +} + +static int macio_do_gpio_irq_disable(struct pmf_function *func) +{ + unsigned int irq = irq_of_parse_and_map(func->node, 0); + if (!irq) + return -EINVAL; + free_irq(irq, func); + return 0; +} + +static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask) +{ + u8 __iomem *addr = (u8 __iomem *)func->driver_data; + unsigned long flags; + u8 tmp; + + /* Check polarity */ + if (args && args->count && !args->u[0].v) + value = ~value; + + /* Toggle the GPIO */ + raw_spin_lock_irqsave(&feature_lock, flags); + tmp = readb(addr); + tmp = (tmp & ~mask) | (value & mask); + DBG("Do write 0x%02x to GPIO %pOF (%p)\n", + tmp, func->node, addr); + writeb(tmp, addr); + raw_spin_unlock_irqrestore(&feature_lock, flags); + + return 0; +} + +static int macio_do_gpio_read(PMF_STD_ARGS, u8 mask, int rshift, u8 xor) +{ + u8 __iomem *addr = (u8 __iomem *)func->driver_data; + u32 value; + + /* Check if we have room for reply */ + if (args == NULL || args->count == 0 || args->u[0].p == NULL) + return -EINVAL; + + value = readb(addr); + *args->u[0].p = ((value & mask) >> rshift) ^ xor; + + return 0; +} + +static int macio_do_delay(PMF_STD_ARGS, u32 duration) +{ + /* assume we can sleep ! */ + msleep((duration + 999) / 1000); + return 0; +} + +static struct pmf_handlers macio_gpio_handlers = { + .irq_enable = macio_do_gpio_irq_enable, + .irq_disable = macio_do_gpio_irq_disable, + .write_gpio = macio_do_gpio_write, + .read_gpio = macio_do_gpio_read, + .delay = macio_do_delay, +}; + +static void __init macio_gpio_init_one(struct macio_chip *macio) +{ + struct device_node *gparent, *gp; + + /* + * Find the "gpio" parent node + */ + + for_each_child_of_node(macio->of_node, gparent) + if (of_node_name_eq(gparent, "gpio")) + break; + if (gparent == NULL) + return; + + DBG("Installing GPIO functions for macio %pOF\n", + macio->of_node); + + /* + * Ok, got one, we dont need anything special to track them down, so + * we just create them all + */ + for_each_child_of_node(gparent, gp) { + const u32 *reg = of_get_property(gp, "reg", NULL); + unsigned long offset; + if (reg == NULL) + continue; + offset = *reg; + /* Deal with old style device-tree. We can safely hard code the + * offset for now too even if it's a bit gross ... + */ + if (offset < 0x50) + offset += 0x50; + offset += (unsigned long)macio->base; + pmf_register_driver(gp, &macio_gpio_handlers, (void *)offset); + } + + DBG("Calling initial GPIO functions for macio %pOF\n", + macio->of_node); + + /* And now we run all the init ones */ + for_each_child_of_node(gparent, gp) + pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL); + + of_node_put(gparent); + + /* Note: We do not at this point implement the "at sleep" or "at wake" + * functions. I yet to find any for GPIOs anyway + */ +} + +static int macio_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask) +{ + struct macio_chip *macio = func->driver_data; + unsigned long flags; + + raw_spin_lock_irqsave(&feature_lock, flags); + MACIO_OUT32(offset, (MACIO_IN32(offset) & ~mask) | (value & mask)); + raw_spin_unlock_irqrestore(&feature_lock, flags); + return 0; +} + +static int macio_do_read_reg32(PMF_STD_ARGS, u32 offset) +{ + struct macio_chip *macio = func->driver_data; + + /* Check if we have room for reply */ + if (args == NULL || args->count == 0 || args->u[0].p == NULL) + return -EINVAL; + + *args->u[0].p = MACIO_IN32(offset); + return 0; +} + +static int macio_do_write_reg8(PMF_STD_ARGS, u32 offset, u8 value, u8 mask) +{ + struct macio_chip *macio = func->driver_data; + unsigned long flags; + + raw_spin_lock_irqsave(&feature_lock, flags); + MACIO_OUT8(offset, (MACIO_IN8(offset) & ~mask) | (value & mask)); + raw_spin_unlock_irqrestore(&feature_lock, flags); + return 0; +} + +static int macio_do_read_reg8(PMF_STD_ARGS, u32 offset) +{ + struct macio_chip *macio = func->driver_data; + + /* Check if we have room for reply */ + if (args == NULL || args->count == 0 || args->u[0].p == NULL) + return -EINVAL; + + *((u8 *)(args->u[0].p)) = MACIO_IN8(offset); + return 0; +} + +static int macio_do_read_reg32_msrx(PMF_STD_ARGS, u32 offset, u32 mask, + u32 shift, u32 xor) +{ + struct macio_chip *macio = func->driver_data; + + /* Check if we have room for reply */ + if (args == NULL || args->count == 0 || args->u[0].p == NULL) + return -EINVAL; + + *args->u[0].p = ((MACIO_IN32(offset) & mask) >> shift) ^ xor; + return 0; +} + +static int macio_do_read_reg8_msrx(PMF_STD_ARGS, u32 offset, u32 mask, + u32 shift, u32 xor) +{ + struct macio_chip *macio = func->driver_data; + + /* Check if we have room for reply */ + if (args == NULL || args->count == 0 || args->u[0].p == NULL) + return -EINVAL; + + *((u8 *)(args->u[0].p)) = ((MACIO_IN8(offset) & mask) >> shift) ^ xor; + return 0; +} + +static int macio_do_write_reg32_slm(PMF_STD_ARGS, u32 offset, u32 shift, + u32 mask) +{ + struct macio_chip *macio = func->driver_data; + unsigned long flags; + u32 tmp, val; + + /* Check args */ + if (args == NULL || args->count == 0) + return -EINVAL; + + raw_spin_lock_irqsave(&feature_lock, flags); + tmp = MACIO_IN32(offset); + val = args->u[0].v << shift; + tmp = (tmp & ~mask) | (val & mask); + MACIO_OUT32(offset, tmp); + raw_spin_unlock_irqrestore(&feature_lock, flags); + return 0; +} + +static int macio_do_write_reg8_slm(PMF_STD_ARGS, u32 offset, u32 shift, + u32 mask) +{ + struct macio_chip *macio = func->driver_data; + unsigned long flags; + u32 tmp, val; + + /* Check args */ + if (args == NULL || args->count == 0) + return -EINVAL; + + raw_spin_lock_irqsave(&feature_lock, flags); + tmp = MACIO_IN8(offset); + val = args->u[0].v << shift; + tmp = (tmp & ~mask) | (val & mask); + MACIO_OUT8(offset, tmp); + raw_spin_unlock_irqrestore(&feature_lock, flags); + return 0; +} + +static struct pmf_handlers macio_mmio_handlers = { + .write_reg32 = macio_do_write_reg32, + .read_reg32 = macio_do_read_reg32, + .write_reg8 = macio_do_write_reg8, + .read_reg8 = macio_do_read_reg8, + .read_reg32_msrx = macio_do_read_reg32_msrx, + .read_reg8_msrx = macio_do_read_reg8_msrx, + .write_reg32_slm = macio_do_write_reg32_slm, + .write_reg8_slm = macio_do_write_reg8_slm, + .delay = macio_do_delay, +}; + +static void __init macio_mmio_init_one(struct macio_chip *macio) +{ + DBG("Installing MMIO functions for macio %pOF\n", + macio->of_node); + + pmf_register_driver(macio->of_node, &macio_mmio_handlers, macio); +} + +static struct device_node *unin_hwclock; + +static int unin_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&feature_lock, flags); + /* This is fairly bogus in darwin, but it should work for our needs + * implemeted that way: + */ + UN_OUT(offset, (UN_IN(offset) & ~mask) | (value & mask)); + raw_spin_unlock_irqrestore(&feature_lock, flags); + return 0; +} + + +static struct pmf_handlers unin_mmio_handlers = { + .write_reg32 = unin_do_write_reg32, + .delay = macio_do_delay, +}; + +static void __init uninorth_install_pfunc(void) +{ + struct device_node *np; + + DBG("Installing functions for UniN %pOF\n", + uninorth_node); + + /* + * Install handlers for the bridge itself + */ + pmf_register_driver(uninorth_node, &unin_mmio_handlers, NULL); + pmf_do_functions(uninorth_node, NULL, 0, PMF_FLAGS_ON_INIT, NULL); + + + /* + * Install handlers for the hwclock child if any + */ + for (np = NULL; (np = of_get_next_child(uninorth_node, np)) != NULL;) + if (of_node_name_eq(np, "hw-clock")) { + unin_hwclock = np; + break; + } + if (unin_hwclock) { + DBG("Installing functions for UniN clock %pOF\n", + unin_hwclock); + pmf_register_driver(unin_hwclock, &unin_mmio_handlers, NULL); + pmf_do_functions(unin_hwclock, NULL, 0, PMF_FLAGS_ON_INIT, + NULL); + } +} + +/* We export this as the SMP code might init us early */ +int __init pmac_pfunc_base_install(void) +{ + static int pfbase_inited; + int i; + + if (pfbase_inited) + return 0; + pfbase_inited = 1; + + if (!machine_is(powermac)) + return 0; + + DBG("Installing base platform functions...\n"); + + /* + * Locate mac-io chips and install handlers + */ + for (i = 0 ; i < MAX_MACIO_CHIPS; i++) { + if (macio_chips[i].of_node) { + macio_mmio_init_one(&macio_chips[i]); + macio_gpio_init_one(&macio_chips[i]); + } + } + + /* + * Install handlers for northbridge and direct mapped hwclock + * if any. We do not implement the config space access callback + * which is only ever used for functions that we do not call in + * the current driver (enabling/disabling cells in U2, mostly used + * to restore the PCI settings, we do that differently) + */ + if (uninorth_node && uninorth_base) + uninorth_install_pfunc(); + + DBG("All base functions installed\n"); + + return 0; +} +machine_arch_initcall(powermac, pmac_pfunc_base_install); + +#ifdef CONFIG_PM + +/* Those can be called by pmac_feature. Ultimately, I should use a sysdev + * or a device, but for now, that's good enough until I sort out some + * ordering issues. Also, we do not bother with GPIOs, as so far I yet have + * to see a case where a GPIO function has the on-suspend or on-resume bit + */ +void pmac_pfunc_base_suspend(void) +{ + int i; + + for (i = 0 ; i < MAX_MACIO_CHIPS; i++) { + if (macio_chips[i].of_node) + pmf_do_functions(macio_chips[i].of_node, NULL, 0, + PMF_FLAGS_ON_SLEEP, NULL); + } + if (uninorth_node) + pmf_do_functions(uninorth_node, NULL, 0, + PMF_FLAGS_ON_SLEEP, NULL); + if (unin_hwclock) + pmf_do_functions(unin_hwclock, NULL, 0, + PMF_FLAGS_ON_SLEEP, NULL); +} + +void pmac_pfunc_base_resume(void) +{ + int i; + + if (unin_hwclock) + pmf_do_functions(unin_hwclock, NULL, 0, + PMF_FLAGS_ON_WAKE, NULL); + if (uninorth_node) + pmf_do_functions(uninorth_node, NULL, 0, + PMF_FLAGS_ON_WAKE, NULL); + for (i = 0 ; i < MAX_MACIO_CHIPS; i++) { + if (macio_chips[i].of_node) + pmf_do_functions(macio_chips[i].of_node, NULL, 0, + PMF_FLAGS_ON_WAKE, NULL); + } +} + +#endif /* CONFIG_PM */ diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c new file mode 100644 index 0000000000..22741ddfd5 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -0,0 +1,1022 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * + * FIXME: Properly make this race free with refcounting etc... + * + * FIXME: LOCKING !!! + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Debug */ +#define LOG_PARSE(fmt...) +#define LOG_ERROR(fmt...) printk(fmt) +#define LOG_BLOB(t,b,c) + +#undef DEBUG +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +/* Command numbers */ +#define PMF_CMD_LIST 0 +#define PMF_CMD_WRITE_GPIO 1 +#define PMF_CMD_READ_GPIO 2 +#define PMF_CMD_WRITE_REG32 3 +#define PMF_CMD_READ_REG32 4 +#define PMF_CMD_WRITE_REG16 5 +#define PMF_CMD_READ_REG16 6 +#define PMF_CMD_WRITE_REG8 7 +#define PMF_CMD_READ_REG8 8 +#define PMF_CMD_DELAY 9 +#define PMF_CMD_WAIT_REG32 10 +#define PMF_CMD_WAIT_REG16 11 +#define PMF_CMD_WAIT_REG8 12 +#define PMF_CMD_READ_I2C 13 +#define PMF_CMD_WRITE_I2C 14 +#define PMF_CMD_RMW_I2C 15 +#define PMF_CMD_GEN_I2C 16 +#define PMF_CMD_SHIFT_BYTES_RIGHT 17 +#define PMF_CMD_SHIFT_BYTES_LEFT 18 +#define PMF_CMD_READ_CFG 19 +#define PMF_CMD_WRITE_CFG 20 +#define PMF_CMD_RMW_CFG 21 +#define PMF_CMD_READ_I2C_SUBADDR 22 +#define PMF_CMD_WRITE_I2C_SUBADDR 23 +#define PMF_CMD_SET_I2C_MODE 24 +#define PMF_CMD_RMW_I2C_SUBADDR 25 +#define PMF_CMD_READ_REG32_MASK_SHR_XOR 26 +#define PMF_CMD_READ_REG16_MASK_SHR_XOR 27 +#define PMF_CMD_READ_REG8_MASK_SHR_XOR 28 +#define PMF_CMD_WRITE_REG32_SHL_MASK 29 +#define PMF_CMD_WRITE_REG16_SHL_MASK 30 +#define PMF_CMD_WRITE_REG8_SHL_MASK 31 +#define PMF_CMD_MASK_AND_COMPARE 32 +#define PMF_CMD_COUNT 33 + +/* This structure holds the state of the parser while walking through + * a function definition + */ +struct pmf_cmd { + const void *cmdptr; + const void *cmdend; + struct pmf_function *func; + void *instdata; + struct pmf_args *args; + int error; +}; + +#if 0 +/* Debug output */ +static void print_blob(const char *title, const void *blob, int bytes) +{ + printk("%s", title); + while(bytes--) { + printk("%02x ", *((u8 *)blob)); + blob += 1; + } + printk("\n"); +} +#endif + +/* + * Parser helpers + */ + +static u32 pmf_next32(struct pmf_cmd *cmd) +{ + u32 value; + if ((cmd->cmdend - cmd->cmdptr) < 4) { + cmd->error = 1; + return 0; + } + value = *((u32 *)cmd->cmdptr); + cmd->cmdptr += 4; + return value; +} + +static const void* pmf_next_blob(struct pmf_cmd *cmd, int count) +{ + const void *value; + if ((cmd->cmdend - cmd->cmdptr) < count) { + cmd->error = 1; + return NULL; + } + value = cmd->cmdptr; + cmd->cmdptr += count; + return value; +} + +/* + * Individual command parsers + */ + +#define PMF_PARSE_CALL(name, cmd, handlers, p...) \ + do { \ + if (cmd->error) \ + return -ENXIO; \ + if (handlers == NULL) \ + return 0; \ + if (handlers->name) \ + return handlers->name(cmd->func, cmd->instdata, \ + cmd->args, p); \ + return -1; \ + } while(0) \ + + +static int pmf_parser_write_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u8 value = (u8)pmf_next32(cmd); + u8 mask = (u8)pmf_next32(cmd); + + LOG_PARSE("pmf: write_gpio(value: %02x, mask: %02x)\n", value, mask); + + PMF_PARSE_CALL(write_gpio, cmd, h, value, mask); +} + +static int pmf_parser_read_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u8 mask = (u8)pmf_next32(cmd); + int rshift = (int)pmf_next32(cmd); + u8 xor = (u8)pmf_next32(cmd); + + LOG_PARSE("pmf: read_gpio(mask: %02x, rshift: %d, xor: %02x)\n", + mask, rshift, xor); + + PMF_PARSE_CALL(read_gpio, cmd, h, mask, rshift, xor); +} + +static int pmf_parser_write_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 value = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + + LOG_PARSE("pmf: write_reg32(offset: %08x, value: %08x, mask: %08x)\n", + offset, value, mask); + + PMF_PARSE_CALL(write_reg32, cmd, h, offset, value, mask); +} + +static int pmf_parser_read_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + + LOG_PARSE("pmf: read_reg32(offset: %08x)\n", offset); + + PMF_PARSE_CALL(read_reg32, cmd, h, offset); +} + + +static int pmf_parser_write_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u16 value = (u16)pmf_next32(cmd); + u16 mask = (u16)pmf_next32(cmd); + + LOG_PARSE("pmf: write_reg16(offset: %08x, value: %04x, mask: %04x)\n", + offset, value, mask); + + PMF_PARSE_CALL(write_reg16, cmd, h, offset, value, mask); +} + +static int pmf_parser_read_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + + LOG_PARSE("pmf: read_reg16(offset: %08x)\n", offset); + + PMF_PARSE_CALL(read_reg16, cmd, h, offset); +} + + +static int pmf_parser_write_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u8 value = (u16)pmf_next32(cmd); + u8 mask = (u16)pmf_next32(cmd); + + LOG_PARSE("pmf: write_reg8(offset: %08x, value: %02x, mask: %02x)\n", + offset, value, mask); + + PMF_PARSE_CALL(write_reg8, cmd, h, offset, value, mask); +} + +static int pmf_parser_read_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + + LOG_PARSE("pmf: read_reg8(offset: %08x)\n", offset); + + PMF_PARSE_CALL(read_reg8, cmd, h, offset); +} + +static int pmf_parser_delay(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 duration = pmf_next32(cmd); + + LOG_PARSE("pmf: delay(duration: %d us)\n", duration); + + PMF_PARSE_CALL(delay, cmd, h, duration); +} + +static int pmf_parser_wait_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 value = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + + LOG_PARSE("pmf: wait_reg32(offset: %08x, comp_value: %08x,mask: %08x)\n", + offset, value, mask); + + PMF_PARSE_CALL(wait_reg32, cmd, h, offset, value, mask); +} + +static int pmf_parser_wait_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u16 value = (u16)pmf_next32(cmd); + u16 mask = (u16)pmf_next32(cmd); + + LOG_PARSE("pmf: wait_reg16(offset: %08x, comp_value: %04x,mask: %04x)\n", + offset, value, mask); + + PMF_PARSE_CALL(wait_reg16, cmd, h, offset, value, mask); +} + +static int pmf_parser_wait_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u8 value = (u8)pmf_next32(cmd); + u8 mask = (u8)pmf_next32(cmd); + + LOG_PARSE("pmf: wait_reg8(offset: %08x, comp_value: %02x,mask: %02x)\n", + offset, value, mask); + + PMF_PARSE_CALL(wait_reg8, cmd, h, offset, value, mask); +} + +static int pmf_parser_read_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 bytes = pmf_next32(cmd); + + LOG_PARSE("pmf: read_i2c(bytes: %ud)\n", bytes); + + PMF_PARSE_CALL(read_i2c, cmd, h, bytes); +} + +static int pmf_parser_write_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 bytes = pmf_next32(cmd); + const void *blob = pmf_next_blob(cmd, bytes); + + LOG_PARSE("pmf: write_i2c(bytes: %ud) ...\n", bytes); + LOG_BLOB("pmf: data: \n", blob, bytes); + + PMF_PARSE_CALL(write_i2c, cmd, h, bytes, blob); +} + + +static int pmf_parser_rmw_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 maskbytes = pmf_next32(cmd); + u32 valuesbytes = pmf_next32(cmd); + u32 totalbytes = pmf_next32(cmd); + const void *maskblob = pmf_next_blob(cmd, maskbytes); + const void *valuesblob = pmf_next_blob(cmd, valuesbytes); + + LOG_PARSE("pmf: rmw_i2c(maskbytes: %ud, valuebytes: %ud, " + "totalbytes: %d) ...\n", + maskbytes, valuesbytes, totalbytes); + LOG_BLOB("pmf: mask data: \n", maskblob, maskbytes); + LOG_BLOB("pmf: values data: \n", valuesblob, valuesbytes); + + PMF_PARSE_CALL(rmw_i2c, cmd, h, maskbytes, valuesbytes, totalbytes, + maskblob, valuesblob); +} + +static int pmf_parser_read_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 bytes = pmf_next32(cmd); + + LOG_PARSE("pmf: read_cfg(offset: %x, bytes: %ud)\n", offset, bytes); + + PMF_PARSE_CALL(read_cfg, cmd, h, offset, bytes); +} + + +static int pmf_parser_write_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 bytes = pmf_next32(cmd); + const void *blob = pmf_next_blob(cmd, bytes); + + LOG_PARSE("pmf: write_cfg(offset: %x, bytes: %ud)\n", offset, bytes); + LOG_BLOB("pmf: data: \n", blob, bytes); + + PMF_PARSE_CALL(write_cfg, cmd, h, offset, bytes, blob); +} + +static int pmf_parser_rmw_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 maskbytes = pmf_next32(cmd); + u32 valuesbytes = pmf_next32(cmd); + u32 totalbytes = pmf_next32(cmd); + const void *maskblob = pmf_next_blob(cmd, maskbytes); + const void *valuesblob = pmf_next_blob(cmd, valuesbytes); + + LOG_PARSE("pmf: rmw_cfg(maskbytes: %ud, valuebytes: %ud," + " totalbytes: %d) ...\n", + maskbytes, valuesbytes, totalbytes); + LOG_BLOB("pmf: mask data: \n", maskblob, maskbytes); + LOG_BLOB("pmf: values data: \n", valuesblob, valuesbytes); + + PMF_PARSE_CALL(rmw_cfg, cmd, h, offset, maskbytes, valuesbytes, + totalbytes, maskblob, valuesblob); +} + + +static int pmf_parser_read_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u8 subaddr = (u8)pmf_next32(cmd); + u32 bytes = pmf_next32(cmd); + + LOG_PARSE("pmf: read_i2c_sub(subaddr: %x, bytes: %ud)\n", + subaddr, bytes); + + PMF_PARSE_CALL(read_i2c_sub, cmd, h, subaddr, bytes); +} + +static int pmf_parser_write_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u8 subaddr = (u8)pmf_next32(cmd); + u32 bytes = pmf_next32(cmd); + const void *blob = pmf_next_blob(cmd, bytes); + + LOG_PARSE("pmf: write_i2c_sub(subaddr: %x, bytes: %ud) ...\n", + subaddr, bytes); + LOG_BLOB("pmf: data: \n", blob, bytes); + + PMF_PARSE_CALL(write_i2c_sub, cmd, h, subaddr, bytes, blob); +} + +static int pmf_parser_set_i2c_mode(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u32 mode = pmf_next32(cmd); + + LOG_PARSE("pmf: set_i2c_mode(mode: %d)\n", mode); + + PMF_PARSE_CALL(set_i2c_mode, cmd, h, mode); +} + + +static int pmf_parser_rmw_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h) +{ + u8 subaddr = (u8)pmf_next32(cmd); + u32 maskbytes = pmf_next32(cmd); + u32 valuesbytes = pmf_next32(cmd); + u32 totalbytes = pmf_next32(cmd); + const void *maskblob = pmf_next_blob(cmd, maskbytes); + const void *valuesblob = pmf_next_blob(cmd, valuesbytes); + + LOG_PARSE("pmf: rmw_i2c_sub(subaddr: %x, maskbytes: %ud, valuebytes: %ud" + ", totalbytes: %d) ...\n", + subaddr, maskbytes, valuesbytes, totalbytes); + LOG_BLOB("pmf: mask data: \n", maskblob, maskbytes); + LOG_BLOB("pmf: values data: \n", valuesblob, valuesbytes); + + PMF_PARSE_CALL(rmw_i2c_sub, cmd, h, subaddr, maskbytes, valuesbytes, + totalbytes, maskblob, valuesblob); +} + +static int pmf_parser_read_reg32_msrx(struct pmf_cmd *cmd, + struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + u32 shift = pmf_next32(cmd); + u32 xor = pmf_next32(cmd); + + LOG_PARSE("pmf: read_reg32_msrx(offset: %x, mask: %x, shift: %x," + " xor: %x\n", offset, mask, shift, xor); + + PMF_PARSE_CALL(read_reg32_msrx, cmd, h, offset, mask, shift, xor); +} + +static int pmf_parser_read_reg16_msrx(struct pmf_cmd *cmd, + struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + u32 shift = pmf_next32(cmd); + u32 xor = pmf_next32(cmd); + + LOG_PARSE("pmf: read_reg16_msrx(offset: %x, mask: %x, shift: %x," + " xor: %x\n", offset, mask, shift, xor); + + PMF_PARSE_CALL(read_reg16_msrx, cmd, h, offset, mask, shift, xor); +} +static int pmf_parser_read_reg8_msrx(struct pmf_cmd *cmd, + struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + u32 shift = pmf_next32(cmd); + u32 xor = pmf_next32(cmd); + + LOG_PARSE("pmf: read_reg8_msrx(offset: %x, mask: %x, shift: %x," + " xor: %x\n", offset, mask, shift, xor); + + PMF_PARSE_CALL(read_reg8_msrx, cmd, h, offset, mask, shift, xor); +} + +static int pmf_parser_write_reg32_slm(struct pmf_cmd *cmd, + struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 shift = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + + LOG_PARSE("pmf: write_reg32_slm(offset: %x, shift: %x, mask: %x\n", + offset, shift, mask); + + PMF_PARSE_CALL(write_reg32_slm, cmd, h, offset, shift, mask); +} + +static int pmf_parser_write_reg16_slm(struct pmf_cmd *cmd, + struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 shift = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + + LOG_PARSE("pmf: write_reg16_slm(offset: %x, shift: %x, mask: %x\n", + offset, shift, mask); + + PMF_PARSE_CALL(write_reg16_slm, cmd, h, offset, shift, mask); +} + +static int pmf_parser_write_reg8_slm(struct pmf_cmd *cmd, + struct pmf_handlers *h) +{ + u32 offset = pmf_next32(cmd); + u32 shift = pmf_next32(cmd); + u32 mask = pmf_next32(cmd); + + LOG_PARSE("pmf: write_reg8_slm(offset: %x, shift: %x, mask: %x\n", + offset, shift, mask); + + PMF_PARSE_CALL(write_reg8_slm, cmd, h, offset, shift, mask); +} + +static int pmf_parser_mask_and_compare(struct pmf_cmd *cmd, + struct pmf_handlers *h) +{ + u32 bytes = pmf_next32(cmd); + const void *maskblob = pmf_next_blob(cmd, bytes); + const void *valuesblob = pmf_next_blob(cmd, bytes); + + LOG_PARSE("pmf: mask_and_compare(length: %ud ...\n", bytes); + LOG_BLOB("pmf: mask data: \n", maskblob, bytes); + LOG_BLOB("pmf: values data: \n", valuesblob, bytes); + + PMF_PARSE_CALL(mask_and_compare, cmd, h, + bytes, maskblob, valuesblob); +} + + +typedef int (*pmf_cmd_parser_t)(struct pmf_cmd *cmd, struct pmf_handlers *h); + +static pmf_cmd_parser_t pmf_parsers[PMF_CMD_COUNT] = +{ + NULL, + pmf_parser_write_gpio, + pmf_parser_read_gpio, + pmf_parser_write_reg32, + pmf_parser_read_reg32, + pmf_parser_write_reg16, + pmf_parser_read_reg16, + pmf_parser_write_reg8, + pmf_parser_read_reg8, + pmf_parser_delay, + pmf_parser_wait_reg32, + pmf_parser_wait_reg16, + pmf_parser_wait_reg8, + pmf_parser_read_i2c, + pmf_parser_write_i2c, + pmf_parser_rmw_i2c, + NULL, /* Bogus command */ + NULL, /* Shift bytes right: NYI */ + NULL, /* Shift bytes left: NYI */ + pmf_parser_read_cfg, + pmf_parser_write_cfg, + pmf_parser_rmw_cfg, + pmf_parser_read_i2c_sub, + pmf_parser_write_i2c_sub, + pmf_parser_set_i2c_mode, + pmf_parser_rmw_i2c_sub, + pmf_parser_read_reg32_msrx, + pmf_parser_read_reg16_msrx, + pmf_parser_read_reg8_msrx, + pmf_parser_write_reg32_slm, + pmf_parser_write_reg16_slm, + pmf_parser_write_reg8_slm, + pmf_parser_mask_and_compare, +}; + +struct pmf_device { + struct list_head link; + struct device_node *node; + struct pmf_handlers *handlers; + struct list_head functions; + struct kref ref; +}; + +static LIST_HEAD(pmf_devices); +static DEFINE_SPINLOCK(pmf_lock); +static DEFINE_MUTEX(pmf_irq_mutex); + +static void pmf_release_device(struct kref *kref) +{ + struct pmf_device *dev = container_of(kref, struct pmf_device, ref); + kfree(dev); +} + +static inline void pmf_put_device(struct pmf_device *dev) +{ + kref_put(&dev->ref, pmf_release_device); +} + +static inline struct pmf_device *pmf_get_device(struct pmf_device *dev) +{ + kref_get(&dev->ref); + return dev; +} + +static inline struct pmf_device *pmf_find_device(struct device_node *np) +{ + struct pmf_device *dev; + + list_for_each_entry(dev, &pmf_devices, link) { + if (dev->node == np) + return pmf_get_device(dev); + } + return NULL; +} + +static int pmf_parse_one(struct pmf_function *func, + struct pmf_handlers *handlers, + void *instdata, struct pmf_args *args) +{ + struct pmf_cmd cmd; + u32 ccode; + int count, rc; + + cmd.cmdptr = func->data; + cmd.cmdend = func->data + func->length; + cmd.func = func; + cmd.instdata = instdata; + cmd.args = args; + cmd.error = 0; + + LOG_PARSE("pmf: func %s, %d bytes, %s...\n", + func->name, func->length, + handlers ? "executing" : "parsing"); + + /* One subcommand to parse for now */ + count = 1; + + while(count-- && cmd.cmdptr < cmd.cmdend) { + /* Get opcode */ + ccode = pmf_next32(&cmd); + /* Check if we are hitting a command list, fetch new count */ + if (ccode == 0) { + count = pmf_next32(&cmd) - 1; + ccode = pmf_next32(&cmd); + } + if (cmd.error) { + LOG_ERROR("pmf: parse error, not enough data\n"); + return -ENXIO; + } + if (ccode >= PMF_CMD_COUNT) { + LOG_ERROR("pmf: command code %d unknown !\n", ccode); + return -ENXIO; + } + if (pmf_parsers[ccode] == NULL) { + LOG_ERROR("pmf: no parser for command %d !\n", ccode); + return -ENXIO; + } + rc = pmf_parsers[ccode](&cmd, handlers); + if (rc != 0) { + LOG_ERROR("pmf: parser for command %d returned" + " error %d\n", ccode, rc); + return rc; + } + } + + /* We are doing an initial parse pass, we need to adjust the size */ + if (handlers == NULL) + func->length = cmd.cmdptr - func->data; + + return 0; +} + +static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata, + const char *name, u32 *data, + unsigned int length) +{ + int count = 0; + struct pmf_function *func = NULL; + + DBG("pmf: Adding functions for platform-do-%s\n", name); + + while (length >= 12) { + /* Allocate a structure */ + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (func == NULL) + goto bail; + kref_init(&func->ref); + INIT_LIST_HEAD(&func->irq_clients); + func->node = dev->node; + func->driver_data = driverdata; + func->name = name; + func->phandle = data[0]; + func->flags = data[1]; + data += 2; + length -= 8; + func->data = data; + func->length = length; + func->dev = dev; + DBG("pmf: idx %d: flags=%08x, phandle=%08x " + " %d bytes remaining, parsing...\n", + count+1, func->flags, func->phandle, length); + if (pmf_parse_one(func, NULL, NULL, NULL)) { + kfree(func); + goto bail; + } + length -= func->length; + data = (u32 *)(((u8 *)data) + func->length); + list_add(&func->link, &dev->functions); + pmf_get_device(dev); + count++; + } + bail: + DBG("pmf: Added %d functions\n", count); + + return count; +} + +static int pmf_add_functions(struct pmf_device *dev, void *driverdata) +{ + struct property *pp; +#define PP_PREFIX "platform-do-" + const int plen = strlen(PP_PREFIX); + int count = 0; + + for_each_property_of_node(dev->node, pp) { + const char *name; + if (strncmp(pp->name, PP_PREFIX, plen) != 0) + continue; + name = pp->name + plen; + if (strlen(name) && pp->length >= 12) + count += pmf_add_function_prop(dev, driverdata, name, + pp->value, pp->length); + } + return count; +} + + +int pmf_register_driver(struct device_node *np, + struct pmf_handlers *handlers, + void *driverdata) +{ + struct pmf_device *dev; + unsigned long flags; + int rc = 0; + + if (handlers == NULL) + return -EINVAL; + + DBG("pmf: registering driver for node %pOF\n", np); + + spin_lock_irqsave(&pmf_lock, flags); + dev = pmf_find_device(np); + spin_unlock_irqrestore(&pmf_lock, flags); + if (dev != NULL) { + DBG("pmf: already there !\n"); + pmf_put_device(dev); + return -EBUSY; + } + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (dev == NULL) { + DBG("pmf: no memory !\n"); + return -ENOMEM; + } + kref_init(&dev->ref); + dev->node = of_node_get(np); + dev->handlers = handlers; + INIT_LIST_HEAD(&dev->functions); + + rc = pmf_add_functions(dev, driverdata); + if (rc == 0) { + DBG("pmf: no functions, disposing.. \n"); + of_node_put(np); + kfree(dev); + return -ENODEV; + } + + spin_lock_irqsave(&pmf_lock, flags); + list_add(&dev->link, &pmf_devices); + spin_unlock_irqrestore(&pmf_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(pmf_register_driver); + +struct pmf_function *pmf_get_function(struct pmf_function *func) +{ + if (!try_module_get(func->dev->handlers->owner)) + return NULL; + kref_get(&func->ref); + return func; +} +EXPORT_SYMBOL_GPL(pmf_get_function); + +static void pmf_release_function(struct kref *kref) +{ + struct pmf_function *func = + container_of(kref, struct pmf_function, ref); + pmf_put_device(func->dev); + kfree(func); +} + +static inline void __pmf_put_function(struct pmf_function *func) +{ + kref_put(&func->ref, pmf_release_function); +} + +void pmf_put_function(struct pmf_function *func) +{ + if (func == NULL) + return; + module_put(func->dev->handlers->owner); + __pmf_put_function(func); +} +EXPORT_SYMBOL_GPL(pmf_put_function); + +void pmf_unregister_driver(struct device_node *np) +{ + struct pmf_device *dev; + unsigned long flags; + + DBG("pmf: unregistering driver for node %pOF\n", np); + + spin_lock_irqsave(&pmf_lock, flags); + dev = pmf_find_device(np); + if (dev == NULL) { + DBG("pmf: not such driver !\n"); + spin_unlock_irqrestore(&pmf_lock, flags); + return; + } + list_del(&dev->link); + + while(!list_empty(&dev->functions)) { + struct pmf_function *func = + list_entry(dev->functions.next, typeof(*func), link); + list_del(&func->link); + __pmf_put_function(func); + } + + pmf_put_device(dev); + spin_unlock_irqrestore(&pmf_lock, flags); +} +EXPORT_SYMBOL_GPL(pmf_unregister_driver); + +static struct pmf_function *__pmf_find_function(struct device_node *target, + const char *name, u32 flags) +{ + struct device_node *actor = of_node_get(target); + struct pmf_device *dev; + struct pmf_function *func, *result = NULL; + char fname[64]; + const u32 *prop; + u32 ph; + + /* + * Look for a "platform-*" function reference. If we can't find + * one, then we fallback to a direct call attempt + */ + snprintf(fname, 63, "platform-%s", name); + prop = of_get_property(target, fname, NULL); + if (prop == NULL) + goto find_it; + ph = *prop; + if (ph == 0) + goto find_it; + + /* + * Ok, now try to find the actor. If we can't find it, we fail, + * there is no point in falling back there + */ + of_node_put(actor); + actor = of_find_node_by_phandle(ph); + if (actor == NULL) + return NULL; + find_it: + dev = pmf_find_device(actor); + if (dev == NULL) { + result = NULL; + goto out; + } + + list_for_each_entry(func, &dev->functions, link) { + if (name && strcmp(name, func->name)) + continue; + if (func->phandle && target->phandle != func->phandle) + continue; + if ((func->flags & flags) == 0) + continue; + result = func; + break; + } + pmf_put_device(dev); +out: + of_node_put(actor); + return result; +} + + +int pmf_register_irq_client(struct device_node *target, + const char *name, + struct pmf_irq_client *client) +{ + struct pmf_function *func; + unsigned long flags; + + spin_lock_irqsave(&pmf_lock, flags); + func = __pmf_find_function(target, name, PMF_FLAGS_INT_GEN); + if (func) + func = pmf_get_function(func); + spin_unlock_irqrestore(&pmf_lock, flags); + if (func == NULL) + return -ENODEV; + + /* guard against manipulations of list */ + mutex_lock(&pmf_irq_mutex); + if (list_empty(&func->irq_clients)) + func->dev->handlers->irq_enable(func); + + /* guard against pmf_do_irq while changing list */ + spin_lock_irqsave(&pmf_lock, flags); + list_add(&client->link, &func->irq_clients); + spin_unlock_irqrestore(&pmf_lock, flags); + + client->func = func; + mutex_unlock(&pmf_irq_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(pmf_register_irq_client); + +void pmf_unregister_irq_client(struct pmf_irq_client *client) +{ + struct pmf_function *func = client->func; + unsigned long flags; + + BUG_ON(func == NULL); + + /* guard against manipulations of list */ + mutex_lock(&pmf_irq_mutex); + client->func = NULL; + + /* guard against pmf_do_irq while changing list */ + spin_lock_irqsave(&pmf_lock, flags); + list_del(&client->link); + spin_unlock_irqrestore(&pmf_lock, flags); + + if (list_empty(&func->irq_clients)) + func->dev->handlers->irq_disable(func); + mutex_unlock(&pmf_irq_mutex); + pmf_put_function(func); +} +EXPORT_SYMBOL_GPL(pmf_unregister_irq_client); + + +void pmf_do_irq(struct pmf_function *func) +{ + unsigned long flags; + struct pmf_irq_client *client; + + /* For now, using a spinlock over the whole function. Can be made + * to drop the lock using 2 lists if necessary + */ + spin_lock_irqsave(&pmf_lock, flags); + list_for_each_entry(client, &func->irq_clients, link) { + if (!try_module_get(client->owner)) + continue; + client->handler(client->data); + module_put(client->owner); + } + spin_unlock_irqrestore(&pmf_lock, flags); +} +EXPORT_SYMBOL_GPL(pmf_do_irq); + + +int pmf_call_one(struct pmf_function *func, struct pmf_args *args) +{ + struct pmf_device *dev = func->dev; + void *instdata = NULL; + int rc = 0; + + DBG(" ** pmf_call_one(%pOF/%s) **\n", dev->node, func->name); + + if (dev->handlers->begin) + instdata = dev->handlers->begin(func, args); + rc = pmf_parse_one(func, dev->handlers, instdata, args); + if (dev->handlers->end) + dev->handlers->end(func, instdata); + + return rc; +} +EXPORT_SYMBOL_GPL(pmf_call_one); + +int pmf_do_functions(struct device_node *np, const char *name, + u32 phandle, u32 fflags, struct pmf_args *args) +{ + struct pmf_device *dev; + struct pmf_function *func, *tmp; + unsigned long flags; + int rc = -ENODEV; + + spin_lock_irqsave(&pmf_lock, flags); + + dev = pmf_find_device(np); + if (dev == NULL) { + spin_unlock_irqrestore(&pmf_lock, flags); + return -ENODEV; + } + list_for_each_entry_safe(func, tmp, &dev->functions, link) { + if (name && strcmp(name, func->name)) + continue; + if (phandle && func->phandle && phandle != func->phandle) + continue; + if ((func->flags & fflags) == 0) + continue; + if (pmf_get_function(func) == NULL) + continue; + spin_unlock_irqrestore(&pmf_lock, flags); + rc = pmf_call_one(func, args); + pmf_put_function(func); + spin_lock_irqsave(&pmf_lock, flags); + } + pmf_put_device(dev); + spin_unlock_irqrestore(&pmf_lock, flags); + + return rc; +} +EXPORT_SYMBOL_GPL(pmf_do_functions); + + +struct pmf_function *pmf_find_function(struct device_node *target, + const char *name) +{ + struct pmf_function *func; + unsigned long flags; + + spin_lock_irqsave(&pmf_lock, flags); + func = __pmf_find_function(target, name, PMF_FLAGS_ON_DEMAND); + if (func) + func = pmf_get_function(func); + spin_unlock_irqrestore(&pmf_lock, flags); + return func; +} +EXPORT_SYMBOL_GPL(pmf_find_function); + +int pmf_call_function(struct device_node *target, const char *name, + struct pmf_args *args) +{ + struct pmf_function *func = pmf_find_function(target, name); + int rc; + + if (func == NULL) + return -ENODEV; + + rc = pmf_call_one(func, args); + pmf_put_function(func); + return rc; +} +EXPORT_SYMBOL_GPL(pmf_call_function); + diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c new file mode 100644 index 0000000000..7135ea1d7d --- /dev/null +++ b/arch/powerpc/platforms/powermac/pic.c @@ -0,0 +1,650 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support for the interrupt controllers found on Power Macintosh, + * currently Apple's "Grand Central" interrupt controller in all + * it's incarnations. OpenPIC support used on newer machines is + * in a separate file + * + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + * Copyright (C) 2005 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * IBM, Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +#ifdef CONFIG_PPC32 +struct pmac_irq_hw { + unsigned int event; + unsigned int enable; + unsigned int ack; + unsigned int level; +}; + +/* Workaround flags for 32bit powermac machines */ +unsigned int of_irq_workarounds; +struct device_node *of_irq_dflt_pic; + +/* Default addresses */ +static volatile struct pmac_irq_hw __iomem *pmac_irq_hw[4]; + +static int max_irqs; +static int max_real_irqs; + +static DEFINE_RAW_SPINLOCK(pmac_pic_lock); + +/* The max irq number this driver deals with is 128; see max_irqs */ +static DECLARE_BITMAP(ppc_lost_interrupts, 128); +static DECLARE_BITMAP(ppc_cached_irq_mask, 128); +static int pmac_irq_cascade = -1; +static struct irq_domain *pmac_pic_host; + +static void __pmac_retrigger(unsigned int irq_nr) +{ + if (irq_nr >= max_real_irqs && pmac_irq_cascade > 0) { + __set_bit(irq_nr, ppc_lost_interrupts); + irq_nr = pmac_irq_cascade; + mb(); + } + if (!__test_and_set_bit(irq_nr, ppc_lost_interrupts)) { + atomic_inc(&ppc_n_lost_interrupts); + set_dec(1); + } +} + +static void pmac_mask_and_ack_irq(struct irq_data *d) +{ + unsigned int src = irqd_to_hwirq(d); + unsigned long bit = 1UL << (src & 0x1f); + int i = src >> 5; + unsigned long flags; + + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + __clear_bit(src, ppc_cached_irq_mask); + if (__test_and_clear_bit(src, ppc_lost_interrupts)) + atomic_dec(&ppc_n_lost_interrupts); + out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); + out_le32(&pmac_irq_hw[i]->ack, bit); + do { + /* make sure ack gets to controller before we enable + interrupts */ + mb(); + } while((in_le32(&pmac_irq_hw[i]->enable) & bit) + != (ppc_cached_irq_mask[i] & bit)); + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +static void pmac_ack_irq(struct irq_data *d) +{ + unsigned int src = irqd_to_hwirq(d); + unsigned long bit = 1UL << (src & 0x1f); + int i = src >> 5; + unsigned long flags; + + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + if (__test_and_clear_bit(src, ppc_lost_interrupts)) + atomic_dec(&ppc_n_lost_interrupts); + out_le32(&pmac_irq_hw[i]->ack, bit); + (void)in_le32(&pmac_irq_hw[i]->ack); + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) +{ + unsigned long bit = 1UL << (irq_nr & 0x1f); + int i = irq_nr >> 5; + + if ((unsigned)irq_nr >= max_irqs) + return; + + /* enable unmasked interrupts */ + out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); + + do { + /* make sure mask gets to controller before we + return to user */ + mb(); + } while((in_le32(&pmac_irq_hw[i]->enable) & bit) + != (ppc_cached_irq_mask[i] & bit)); + + /* + * Unfortunately, setting the bit in the enable register + * when the device interrupt is already on *doesn't* set + * the bit in the flag register or request another interrupt. + */ + if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level)) + __pmac_retrigger(irq_nr); +} + +/* When an irq gets requested for the first client, if it's an + * edge interrupt, we clear any previous one on the controller + */ +static unsigned int pmac_startup_irq(struct irq_data *d) +{ + unsigned long flags; + unsigned int src = irqd_to_hwirq(d); + unsigned long bit = 1UL << (src & 0x1f); + int i = src >> 5; + + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + if (!irqd_is_level_type(d)) + out_le32(&pmac_irq_hw[i]->ack, bit); + __set_bit(src, ppc_cached_irq_mask); + __pmac_set_irq_mask(src, 0); + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); + + return 0; +} + +static void pmac_mask_irq(struct irq_data *d) +{ + unsigned long flags; + unsigned int src = irqd_to_hwirq(d); + + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + __clear_bit(src, ppc_cached_irq_mask); + __pmac_set_irq_mask(src, 1); + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +static void pmac_unmask_irq(struct irq_data *d) +{ + unsigned long flags; + unsigned int src = irqd_to_hwirq(d); + + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + __set_bit(src, ppc_cached_irq_mask); + __pmac_set_irq_mask(src, 0); + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +static int pmac_retrigger(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + __pmac_retrigger(irqd_to_hwirq(d)); + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); + return 1; +} + +static struct irq_chip pmac_pic = { + .name = "PMAC-PIC", + .irq_startup = pmac_startup_irq, + .irq_mask = pmac_mask_irq, + .irq_ack = pmac_ack_irq, + .irq_mask_ack = pmac_mask_and_ack_irq, + .irq_unmask = pmac_unmask_irq, + .irq_retrigger = pmac_retrigger, +}; + +static irqreturn_t gatwick_action(int cpl, void *dev_id) +{ + unsigned long flags; + int irq, bits; + int rc = IRQ_NONE; + + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) { + int i = irq >> 5; + bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; + bits |= in_le32(&pmac_irq_hw[i]->level); + bits &= ppc_cached_irq_mask[i]; + if (bits == 0) + continue; + irq += __ilog2(bits); + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); + generic_handle_irq(irq); + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + rc = IRQ_HANDLED; + } + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); + return rc; +} + +static unsigned int pmac_pic_get_irq(void) +{ + int irq; + unsigned long bits = 0; + unsigned long flags; + +#ifdef CONFIG_PPC_PMAC32_PSURGE + /* IPI's are a hack on the powersurge -- Cort */ + if (smp_processor_id() != 0) { + return psurge_secondary_virq; + } +#endif /* CONFIG_PPC_PMAC32_PSURGE */ + raw_spin_lock_irqsave(&pmac_pic_lock, flags); + for (irq = max_real_irqs; (irq -= 32) >= 0; ) { + int i = irq >> 5; + bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; + bits |= in_le32(&pmac_irq_hw[i]->level); + bits &= ppc_cached_irq_mask[i]; + if (bits == 0) + continue; + irq += __ilog2(bits); + break; + } + raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); + if (unlikely(irq < 0)) + return 0; + return irq_linear_revmap(pmac_pic_host, irq); +} + +static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) +{ + /* We match all, we don't always have a node anyway */ + return 1; +} + +static int pmac_pic_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + if (hw >= max_irqs) + return -EINVAL; + + /* Mark level interrupts, set delayed disable for edge ones and set + * handlers + */ + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_chip_and_handler(virq, &pmac_pic, handle_level_irq); + return 0; +} + +static const struct irq_domain_ops pmac_pic_host_ops = { + .match = pmac_pic_host_match, + .map = pmac_pic_host_map, + .xlate = irq_domain_xlate_onecell, +}; + +static void __init pmac_pic_probe_oldstyle(void) +{ + int i; + struct device_node *master = NULL; + struct device_node *slave = NULL; + u8 __iomem *addr; + struct resource r; + + /* Set our get_irq function */ + ppc_md.get_irq = pmac_pic_get_irq; + + /* + * Find the interrupt controller type & node + */ + + if ((master = of_find_node_by_name(NULL, "gc")) != NULL) { + max_irqs = max_real_irqs = 32; + } else if ((master = of_find_node_by_name(NULL, "ohare")) != NULL) { + max_irqs = max_real_irqs = 32; + /* We might have a second cascaded ohare */ + slave = of_find_node_by_name(NULL, "pci106b,7"); + if (slave) + max_irqs = 64; + } else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) { + max_irqs = max_real_irqs = 64; + + /* We might have a second cascaded heathrow */ + + /* Compensate for of_node_put() in of_find_node_by_name() */ + of_node_get(master); + slave = of_find_node_by_name(master, "mac-io"); + + /* Check ordering of master & slave */ + if (of_device_is_compatible(master, "gatwick")) { + BUG_ON(slave == NULL); + swap(master, slave); + } + + /* We found a slave */ + if (slave) + max_irqs = 128; + } + BUG_ON(master == NULL); + + /* + * Allocate an irq host + */ + pmac_pic_host = irq_domain_add_linear(master, max_irqs, + &pmac_pic_host_ops, NULL); + BUG_ON(pmac_pic_host == NULL); + irq_set_default_host(pmac_pic_host); + + /* Get addresses of first controller if we have a node for it */ + BUG_ON(of_address_to_resource(master, 0, &r)); + + /* Map interrupts of primary controller */ + addr = (u8 __iomem *) ioremap(r.start, 0x40); + i = 0; + pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *) + (addr + 0x20); + if (max_real_irqs > 32) + pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *) + (addr + 0x10); + of_node_put(master); + + printk(KERN_INFO "irq: Found primary Apple PIC %pOF for %d irqs\n", + master, max_real_irqs); + + /* Map interrupts of cascaded controller */ + if (slave && !of_address_to_resource(slave, 0, &r)) { + addr = (u8 __iomem *)ioremap(r.start, 0x40); + pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *) + (addr + 0x20); + if (max_irqs > 64) + pmac_irq_hw[i++] = + (volatile struct pmac_irq_hw __iomem *) + (addr + 0x10); + pmac_irq_cascade = irq_of_parse_and_map(slave, 0); + + printk(KERN_INFO "irq: Found slave Apple PIC %pOF for %d irqs" + " cascade: %d\n", slave, + max_irqs - max_real_irqs, pmac_irq_cascade); + } + of_node_put(slave); + + /* Disable all interrupts in all controllers */ + for (i = 0; i * 32 < max_irqs; ++i) + out_le32(&pmac_irq_hw[i]->enable, 0); + + /* Hookup cascade irq */ + if (slave && pmac_irq_cascade) { + if (request_irq(pmac_irq_cascade, gatwick_action, + IRQF_NO_THREAD, "cascade", NULL)) + pr_err("Failed to register cascade interrupt\n"); + } + + printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs); +#ifdef CONFIG_XMON + i = irq_create_mapping(NULL, 20); + if (request_irq(i, xmon_irq, IRQF_NO_THREAD, "NMI - XMON", NULL)) + pr_err("Failed to register NMI-XMON interrupt\n"); +#endif +} + +int of_irq_parse_oldworld(const struct device_node *device, int index, + struct of_phandle_args *out_irq) +{ + const u32 *ints = NULL; + int intlen; + + /* + * Old machines just have a list of interrupt numbers + * and no interrupt-controller nodes. We also have dodgy + * cases where the APPL,interrupts property is completely + * missing behind pci-pci bridges and we have to get it + * from the parent (the bridge itself, as apple just wired + * everything together on these) + */ + while (device) { + ints = of_get_property(device, "AAPL,interrupts", &intlen); + if (ints != NULL) + break; + device = device->parent; + if (!of_node_is_type(device, "pci")) + break; + } + if (ints == NULL) + return -EINVAL; + intlen /= sizeof(u32); + + if (index >= intlen) + return -EINVAL; + + out_irq->np = NULL; + out_irq->args[0] = ints[index]; + out_irq->args_count = 1; + + return 0; +} +#endif /* CONFIG_PPC32 */ + +static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) +{ +#if defined(CONFIG_XMON) && defined(CONFIG_PPC32) + struct device_node* pswitch; + int nmi_irq; + + pswitch = of_find_node_by_name(NULL, "programmer-switch"); + if (pswitch) { + nmi_irq = irq_of_parse_and_map(pswitch, 0); + if (nmi_irq) { + mpic_irq_set_priority(nmi_irq, 9); + if (request_irq(nmi_irq, xmon_irq, IRQF_NO_THREAD, + "NMI - XMON", NULL)) + pr_err("Failed to register NMI-XMON interrupt\n"); + } + of_node_put(pswitch); + } +#endif /* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */ +} + +static struct mpic * __init pmac_setup_one_mpic(struct device_node *np, + int master) +{ + const char *name = master ? " MPIC 1 " : " MPIC 2 "; + struct mpic *mpic; + unsigned int flags = master ? 0 : MPIC_SECONDARY; + + pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0); + + if (of_property_read_bool(np, "big-endian")) + flags |= MPIC_BIG_ENDIAN; + + /* Primary Big Endian means HT interrupts. This is quite dodgy + * but works until I find a better way + */ + if (master && (flags & MPIC_BIG_ENDIAN)) + flags |= MPIC_U3_HT_IRQS; + + mpic = mpic_alloc(np, 0, flags, 0, 0, name); + if (mpic == NULL) + return NULL; + + mpic_init(mpic); + + return mpic; + } + +static int __init pmac_pic_probe_mpic(void) +{ + struct mpic *mpic1, *mpic2; + struct device_node *np, *master = NULL, *slave = NULL; + + /* We can have up to 2 MPICs cascaded */ + for_each_node_by_type(np, "open-pic") { + if (master == NULL && !of_property_present(np, "interrupts")) + master = of_node_get(np); + else if (slave == NULL) + slave = of_node_get(np); + if (master && slave) { + of_node_put(np); + break; + } + } + + /* Check for bogus setups */ + if (master == NULL && slave != NULL) { + master = slave; + slave = NULL; + } + + /* Not found, default to good old pmac pic */ + if (master == NULL) + return -ENODEV; + + /* Set master handler */ + ppc_md.get_irq = mpic_get_irq; + + /* Setup master */ + mpic1 = pmac_setup_one_mpic(master, 1); + BUG_ON(mpic1 == NULL); + + /* Install NMI if any */ + pmac_pic_setup_mpic_nmi(mpic1); + + of_node_put(master); + + /* Set up a cascaded controller, if present */ + if (slave) { + mpic2 = pmac_setup_one_mpic(slave, 0); + if (mpic2 == NULL) + printk(KERN_ERR "Failed to setup slave MPIC\n"); + of_node_put(slave); + } + + return 0; +} + + +void __init pmac_pic_init(void) +{ + /* We configure the OF parsing based on our oldworld vs. newworld + * platform type and whether we were booted by BootX. + */ +#ifdef CONFIG_PPC32 + if (!pmac_newworld) + of_irq_workarounds |= OF_IMAP_OLDWORLD_MAC; + if (of_property_read_bool(of_chosen, "linux,bootx")) + of_irq_workarounds |= OF_IMAP_NO_PHANDLE; + + /* If we don't have phandles on a newworld, then try to locate a + * default interrupt controller (happens when booting with BootX). + * We do a first match here, hopefully, that only ever happens on + * machines with one controller. + */ + if (pmac_newworld && (of_irq_workarounds & OF_IMAP_NO_PHANDLE)) { + struct device_node *np; + + for_each_node_with_property(np, "interrupt-controller") { + /* Skip /chosen/interrupt-controller */ + if (of_node_name_eq(np, "chosen")) + continue; + /* It seems like at least one person wants + * to use BootX on a machine with an AppleKiwi + * controller which happens to pretend to be an + * interrupt controller too. */ + if (of_node_name_eq(np, "AppleKiwi")) + continue; + /* I think we found one ! */ + of_irq_dflt_pic = np; + break; + } + } +#endif /* CONFIG_PPC32 */ + + /* We first try to detect Apple's new Core99 chipset, since mac-io + * is quite different on those machines and contains an IBM MPIC2. + */ + if (pmac_pic_probe_mpic() == 0) + return; + +#ifdef CONFIG_PPC32 + pmac_pic_probe_oldstyle(); +#endif +} + +#if defined(CONFIG_PM) && defined(CONFIG_PPC32) +/* + * These procedures are used in implementing sleep on the powerbooks. + * sleep_save_intrs() saves the states of all interrupt enables + * and disables all interrupts except for the nominated one. + * sleep_restore_intrs() restores the states of all interrupt enables. + */ +unsigned long sleep_save_mask[2]; + +/* This used to be passed by the PMU driver but that link got + * broken with the new driver model. We use this tweak for now... + * We really want to do things differently though... + */ +static int pmacpic_find_viaint(void) +{ + int viaint = -1; + +#ifdef CONFIG_ADB_PMU + struct device_node *np; + + if (pmu_get_model() != PMU_OHARE_BASED) + goto not_found; + np = of_find_node_by_name(NULL, "via-pmu"); + if (np == NULL) + goto not_found; + viaint = irq_of_parse_and_map(np, 0); + of_node_put(np); + +not_found: +#endif /* CONFIG_ADB_PMU */ + return viaint; +} + +static int pmacpic_suspend(void) +{ + int viaint = pmacpic_find_viaint(); + + sleep_save_mask[0] = ppc_cached_irq_mask[0]; + sleep_save_mask[1] = ppc_cached_irq_mask[1]; + ppc_cached_irq_mask[0] = 0; + ppc_cached_irq_mask[1] = 0; + if (viaint > 0) + set_bit(viaint, ppc_cached_irq_mask); + out_le32(&pmac_irq_hw[0]->enable, ppc_cached_irq_mask[0]); + if (max_real_irqs > 32) + out_le32(&pmac_irq_hw[1]->enable, ppc_cached_irq_mask[1]); + (void)in_le32(&pmac_irq_hw[0]->event); + /* make sure mask gets to controller before we return to caller */ + mb(); + (void)in_le32(&pmac_irq_hw[0]->enable); + + return 0; +} + +static void pmacpic_resume(void) +{ + int i; + + out_le32(&pmac_irq_hw[0]->enable, 0); + if (max_real_irqs > 32) + out_le32(&pmac_irq_hw[1]->enable, 0); + mb(); + for (i = 0; i < max_real_irqs; ++i) + if (test_bit(i, sleep_save_mask)) + pmac_unmask_irq(irq_get_irq_data(i)); +} + +static struct syscore_ops pmacpic_syscore_ops = { + .suspend = pmacpic_suspend, + .resume = pmacpic_resume, +}; + +static int __init init_pmacpic_syscore(void) +{ + if (pmac_irq_hw[0]) + register_syscore_ops(&pmacpic_syscore_ops); + return 0; +} + +machine_subsys_initcall(powermac, init_pmacpic_syscore); + +#endif /* CONFIG_PM && CONFIG_PPC32 */ diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h new file mode 100644 index 0000000000..1b696f3526 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PMAC_H__ +#define __PMAC_H__ + +#include +#include + +#include + +/* + * Declaration for the various functions exported by the + * pmac_* files. Mostly for use by pmac_setup + */ + +struct rtc_time; + +extern int pmac_newworld; + +void g5_phy_disable_cpu1(void); + +extern long pmac_time_init(void); +extern time64_t pmac_get_boot_time(void); +extern void pmac_get_rtc_time(struct rtc_time *); +extern int pmac_set_rtc_time(struct rtc_time *); +extern void pmac_read_rtc_time(void); +extern void pmac_calibrate_decr(void); +extern void pmac_pci_irq_fixup(struct pci_dev *); +extern void pmac_pci_init(void); + +extern void pmac_nvram_update(void); +extern unsigned char pmac_nvram_read_byte(int addr); +extern void pmac_nvram_write_byte(int addr, unsigned char val); +extern void pmac_pcibios_after_init(void); + +extern void pmac_setup_pci_dma(void); +extern void pmac_check_ht_link(void); + +extern void pmac_setup_smp(void); +extern int psurge_secondary_virq; +extern void low_cpu_offline_self(void) __attribute__((noreturn)); + +extern int pmac_nvram_init(void); +extern void pmac_pic_init(void); + +extern struct pci_controller_ops pmac_pci_controller_ops; + +#endif /* __PMAC_H__ */ diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c new file mode 100644 index 0000000000..6de1cd5d8a --- /dev/null +++ b/arch/powerpc/platforms/powermac/setup.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Powermac setup and early boot code plus other random bits. + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@samba.org) + * + * Derived from "arch/alpha/kernel/setup.c" + * Copyright (C) 1995 Linus Torvalds + * + * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org) + */ + +/* + * bootup setup stuff.. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +#undef SHOW_GATWICK_IRQS + +static int has_l2cache; + +int pmac_newworld; + +static int current_root_goodness = -1; + +/* sda1 - slightly silly choice */ +#define DEFAULT_ROOT_DEVICE MKDEV(SCSI_DISK0_MAJOR, 1) + +sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; +EXPORT_SYMBOL(sys_ctrler); + +static void pmac_show_cpuinfo(struct seq_file *m) +{ + struct device_node *np; + const char *pp; + int plen; + int mbmodel; + unsigned int mbflags; + char* mbname; + + mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, + PMAC_MB_INFO_MODEL, 0); + mbflags = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, + PMAC_MB_INFO_FLAGS, 0); + if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME, + (long) &mbname) != 0) + mbname = "Unknown"; + + /* find motherboard type */ + seq_printf(m, "machine\t\t: "); + np = of_find_node_by_path("/"); + if (np != NULL) { + pp = of_get_property(np, "model", NULL); + if (pp != NULL) + seq_printf(m, "%s\n", pp); + else + seq_printf(m, "PowerMac\n"); + pp = of_get_property(np, "compatible", &plen); + if (pp != NULL) { + seq_printf(m, "motherboard\t:"); + while (plen > 0) { + int l = strlen(pp) + 1; + seq_printf(m, " %s", pp); + plen -= l; + pp += l; + } + seq_printf(m, "\n"); + } + of_node_put(np); + } else + seq_printf(m, "PowerMac\n"); + + /* print parsed model */ + seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname); + seq_printf(m, "pmac flags\t: %08x\n", mbflags); + + /* find l2 cache info */ + np = of_find_node_by_name(NULL, "l2-cache"); + if (np == NULL) + np = of_find_node_by_type(NULL, "cache"); + if (np != NULL) { + const unsigned int *ic = + of_get_property(np, "i-cache-size", NULL); + const unsigned int *dc = + of_get_property(np, "d-cache-size", NULL); + seq_printf(m, "L2 cache\t:"); + has_l2cache = 1; + if (of_property_read_bool(np, "cache-unified") && dc) { + seq_printf(m, " %dK unified", *dc / 1024); + } else { + if (ic) + seq_printf(m, " %dK instruction", *ic / 1024); + if (dc) + seq_printf(m, "%s %dK data", + (ic? " +": ""), *dc / 1024); + } + pp = of_get_property(np, "ram-type", NULL); + if (pp) + seq_printf(m, " %s", pp); + seq_printf(m, "\n"); + of_node_put(np); + } + + /* Indicate newworld/oldworld */ + seq_printf(m, "pmac-generation\t: %s\n", + pmac_newworld ? "NewWorld" : "OldWorld"); +} + +#ifndef CONFIG_ADB_CUDA +int __init find_via_cuda(void) +{ + struct device_node *dn = of_find_node_by_name(NULL, "via-cuda"); + + if (!dn) + return 0; + of_node_put(dn); + printk("WARNING ! Your machine is CUDA-based but your kernel\n"); + printk(" wasn't compiled with CONFIG_ADB_CUDA option !\n"); + return 0; +} +#endif + +#ifndef CONFIG_ADB_PMU +int __init find_via_pmu(void) +{ + struct device_node *dn = of_find_node_by_name(NULL, "via-pmu"); + + if (!dn) + return 0; + of_node_put(dn); + printk("WARNING ! Your machine is PMU-based but your kernel\n"); + printk(" wasn't compiled with CONFIG_ADB_PMU option !\n"); + return 0; +} +#endif + +#ifndef CONFIG_PMAC_SMU +int __init smu_init(void) +{ + /* should check and warn if SMU is present */ + return 0; +} +#endif + +#ifdef CONFIG_PPC32 +static volatile u32 *sysctrl_regs; + +static void __init ohare_init(void) +{ + struct device_node *dn; + + /* this area has the CPU identification register + and some registers used by smp boards */ + sysctrl_regs = (volatile u32 *) ioremap(0xf8000000, 0x1000); + + /* + * Turn on the L2 cache. + * We assume that we have a PSX memory controller iff + * we have an ohare I/O controller. + */ + dn = of_find_node_by_name(NULL, "ohare"); + if (dn) { + of_node_put(dn); + if (((sysctrl_regs[2] >> 24) & 0xf) >= 3) { + if (sysctrl_regs[4] & 0x10) + sysctrl_regs[4] |= 0x04000020; + else + sysctrl_regs[4] |= 0x04000000; + if(has_l2cache) + printk(KERN_INFO "Level 2 cache enabled\n"); + } + } +} + +static void __init l2cr_init(void) +{ + /* Checks "l2cr-value" property in the registry */ + if (cpu_has_feature(CPU_FTR_L2CR)) { + struct device_node *np; + + for_each_of_cpu_node(np) { + const unsigned int *l2cr = + of_get_property(np, "l2cr-value", NULL); + if (l2cr) { + _set_L2CR(0); + _set_L2CR(*l2cr); + pr_info("L2CR overridden (0x%x), backside cache is %s\n", + *l2cr, ((*l2cr) & 0x80000000) ? + "enabled" : "disabled"); + } + of_node_put(np); + break; + } + } +} +#endif + +static void __init pmac_setup_arch(void) +{ + struct device_node *cpu, *ic; + const int *fp; + unsigned long pvr; + + pvr = PVR_VER(mfspr(SPRN_PVR)); + + /* Set loops_per_jiffy to a half-way reasonable value, + for use until calibrate_delay gets called. */ + loops_per_jiffy = 50000000 / HZ; + + for_each_of_cpu_node(cpu) { + fp = of_get_property(cpu, "clock-frequency", NULL); + if (fp != NULL) { + if (pvr >= 0x30 && pvr < 0x80) + /* PPC970 etc. */ + loops_per_jiffy = *fp / (3 * HZ); + else if (pvr == 4 || pvr >= 8) + /* 604, G3, G4 etc. */ + loops_per_jiffy = *fp / HZ; + else + /* 603, etc. */ + loops_per_jiffy = *fp / (2 * HZ); + of_node_put(cpu); + break; + } + } + + /* See if newworld or oldworld */ + ic = of_find_node_with_property(NULL, "interrupt-controller"); + if (ic) { + pmac_newworld = 1; + of_node_put(ic); + } + +#ifdef CONFIG_PPC32 + ohare_init(); + l2cr_init(); +#endif /* CONFIG_PPC32 */ + + find_via_cuda(); + find_via_pmu(); + smu_init(); + +#if IS_ENABLED(CONFIG_NVRAM) + pmac_nvram_init(); +#endif +#ifdef CONFIG_PPC32 +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + ROOT_DEV = Root_RAM0; + else +#endif + ROOT_DEV = DEFAULT_ROOT_DEVICE; +#endif + +#ifdef CONFIG_ADB + if (strstr(boot_command_line, "adb_sync")) { + extern int __adb_probe_sync; + __adb_probe_sync = 1; + } +#endif /* CONFIG_ADB */ +} + +static int initializing = 1; + +static int pmac_late_init(void) +{ + initializing = 0; + return 0; +} +machine_late_initcall(powermac, pmac_late_init); + +void note_bootable_part(dev_t dev, int part, int goodness); +/* + * This is __ref because we check for "initializing" before + * touching any of the __init sensitive things and "initializing" + * will be false after __init time. This can't be __init because it + * can be called whenever a disk is first accessed. + */ +void __ref note_bootable_part(dev_t dev, int part, int goodness) +{ + char *p; + + if (!initializing) + return; + if ((goodness <= current_root_goodness) && + ROOT_DEV != DEFAULT_ROOT_DEVICE) + return; + p = strstr(boot_command_line, "root="); + if (p != NULL && (p == boot_command_line || p[-1] == ' ')) + return; + + ROOT_DEV = dev + part; + current_root_goodness = goodness; +} + +#ifdef CONFIG_ADB_CUDA +static void __noreturn cuda_restart(void) +{ + struct adb_request req; + + cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_RESET_SYSTEM); + for (;;) + cuda_poll(); +} + +static void __noreturn cuda_shutdown(void) +{ + struct adb_request req; + + cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_POWERDOWN); + for (;;) + cuda_poll(); +} + +#else +#define cuda_restart() +#define cuda_shutdown() +#endif + +#ifndef CONFIG_ADB_PMU +#define pmu_restart() +#define pmu_shutdown() +#endif + +#ifndef CONFIG_PMAC_SMU +#define smu_restart() +#define smu_shutdown() +#endif + +static void __noreturn pmac_restart(char *cmd) +{ + switch (sys_ctrler) { + case SYS_CTRLER_CUDA: + cuda_restart(); + break; + case SYS_CTRLER_PMU: + pmu_restart(); + break; + case SYS_CTRLER_SMU: + smu_restart(); + break; + default: ; + } + while (1) ; +} + +static void __noreturn pmac_power_off(void) +{ + switch (sys_ctrler) { + case SYS_CTRLER_CUDA: + cuda_shutdown(); + break; + case SYS_CTRLER_PMU: + pmu_shutdown(); + break; + case SYS_CTRLER_SMU: + smu_shutdown(); + break; + default: ; + } + while (1) ; +} + +static void __noreturn +pmac_halt(void) +{ + pmac_power_off(); +} + +/* + * Early initialization. + */ +static void __init pmac_init(void) +{ + /* Enable early btext debug if requested */ + if (strstr(boot_command_line, "btextdbg")) { + udbg_adb_init_early(); + register_early_udbg_console(); + } + + /* Probe motherboard chipset */ + pmac_feature_init(); + + /* Initialize debug stuff */ + udbg_scc_init(!!strstr(boot_command_line, "sccdbg")); + udbg_adb_init(!!strstr(boot_command_line, "btextdbg")); + +#ifdef CONFIG_PPC64 + iommu_init_early_dart(&pmac_pci_controller_ops); +#endif + + /* SMP Init has to be done early as we need to patch up + * cpu_possible_mask before interrupt stacks are allocated + * or kaboom... + */ +#ifdef CONFIG_SMP + pmac_setup_smp(); +#endif +} + +static int __init pmac_declare_of_platform_devices(void) +{ + struct device_node *np; + + np = of_find_node_by_name(NULL, "valkyrie"); + if (np) { + of_platform_device_create(np, "valkyrie", NULL); + of_node_put(np); + } + np = of_find_node_by_name(NULL, "platinum"); + if (np) { + of_platform_device_create(np, "platinum", NULL); + of_node_put(np); + } + np = of_find_node_by_type(NULL, "smu"); + if (np) { + of_platform_device_create(np, "smu", NULL); + of_node_put(np); + } + np = of_find_node_by_type(NULL, "fcu"); + if (np == NULL) { + /* Some machines have strangely broken device-tree */ + np = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/fan@15e"); + } + if (np) { + of_platform_device_create(np, "temperature", NULL); + of_node_put(np); + } + + return 0; +} +machine_device_initcall(powermac, pmac_declare_of_platform_devices); + +#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE +/* + * This is called very early, as part of console_init() (typically just after + * time_init()). This function is respondible for trying to find a good + * default console on serial ports. It tries to match the open firmware + * default output with one of the available serial console drivers. + */ +static int __init check_pmac_serial_console(void) +{ + struct device_node *prom_stdout = NULL; + int offset = 0; + const char *name; +#ifdef CONFIG_SERIAL_PMACZILOG_TTYS + char *devname = "ttyS"; +#else + char *devname = "ttyPZ"; +#endif + + pr_debug(" -> check_pmac_serial_console()\n"); + + /* The user has requested a console so this is already set up. */ + if (strstr(boot_command_line, "console=")) { + pr_debug(" console was specified !\n"); + return -EBUSY; + } + + if (!of_chosen) { + pr_debug(" of_chosen is NULL !\n"); + return -ENODEV; + } + + /* We are getting a weird phandle from OF ... */ + /* ... So use the full path instead */ + name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) { + pr_debug(" no linux,stdout-path !\n"); + return -ENODEV; + } + prom_stdout = of_find_node_by_path(name); + if (!prom_stdout) { + pr_debug(" can't find stdout package %s !\n", name); + return -ENODEV; + } + pr_debug("stdout is %pOF\n", prom_stdout); + + if (of_node_name_eq(prom_stdout, "ch-a")) + offset = 0; + else if (of_node_name_eq(prom_stdout, "ch-b")) + offset = 1; + else + goto not_found; + of_node_put(prom_stdout); + + pr_debug("Found serial console at %s%d\n", devname, offset); + + return add_preferred_console(devname, offset, NULL); + + not_found: + pr_debug("No preferred console found !\n"); + of_node_put(prom_stdout); + return -ENODEV; +} +console_initcall(check_pmac_serial_console); + +#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */ + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init pmac_probe(void) +{ + if (!of_machine_is_compatible("Power Macintosh") && + !of_machine_is_compatible("MacRISC")) + return 0; + +#ifdef CONFIG_PPC32 + /* isa_io_base gets set in pmac_pci_init */ + DMA_MODE_READ = 1; + DMA_MODE_WRITE = 2; +#endif /* CONFIG_PPC32 */ + + pm_power_off = pmac_power_off; + + pmac_init(); + + return 1; +} + +define_machine(powermac) { + .name = "PowerMac", + .probe = pmac_probe, + .setup_arch = pmac_setup_arch, + .discover_phbs = pmac_pci_init, + .show_cpuinfo = pmac_show_cpuinfo, + .init_IRQ = pmac_pic_init, + .get_irq = NULL, /* changed later */ + .pci_irq_fixup = pmac_pci_irq_fixup, + .restart = pmac_restart, + .halt = pmac_halt, + .time_init = pmac_time_init, + .get_boot_time = pmac_get_boot_time, + .set_rtc_time = pmac_set_rtc_time, + .get_rtc_time = pmac_get_rtc_time, + .calibrate_decr = pmac_calibrate_decr, + .feature_call = pmac_do_feature_call, + .progress = udbg_progress, +#ifdef CONFIG_PPC64 + .power_save = power4_idle, + .enable_pmcs = power4_enable_pmcs, +#endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC32 + .pcibios_after_init = pmac_pcibios_after_init, + .phys_mem_access_prot = pci_phys_mem_access_prot, +#endif +}; diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S new file mode 100644 index 0000000000..d497a60003 --- /dev/null +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -0,0 +1,433 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains sleep low-level functions for PowerBook G3. + * Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * and Paul Mackerras (paulus@samba.org). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAGIC 0x4c617273 /* 'Lars' */ + +/* + * Structure for storing CPU registers on the stack. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_SDR1 0xc +#define SL_SPRG0 0x10 /* 4 sprg's */ +#define SL_DBAT0 0x20 +#define SL_IBAT0 0x28 +#define SL_DBAT1 0x30 +#define SL_IBAT1 0x38 +#define SL_DBAT2 0x40 +#define SL_IBAT2 0x48 +#define SL_DBAT3 0x50 +#define SL_IBAT3 0x58 +#define SL_DBAT4 0x60 +#define SL_IBAT4 0x68 +#define SL_DBAT5 0x70 +#define SL_IBAT5 0x78 +#define SL_DBAT6 0x80 +#define SL_IBAT6 0x88 +#define SL_DBAT7 0x90 +#define SL_IBAT7 0x98 +#define SL_TB 0xa0 +#define SL_R2 0xa8 +#define SL_CR 0xac +#define SL_LR 0xb0 +#define SL_R12 0xb4 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .text + .align 5 + +#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) || \ + (defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32)) + +/* This gets called by via-pmu.c late during the sleep process. + * The PMU was already send the sleep command and will shut us down + * soon. We need to save all that is needed and setup the wakeup + * vector that will be called by the ROM on wakeup + */ +_GLOBAL(low_sleep_handler) +#ifndef CONFIG_PPC_BOOK3S_32 + blr +#else + mflr r0 + lis r11,sleep_storage@ha + addi r11,r11,sleep_storage@l + stw r0,SL_LR(r11) + mfcr r0 + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) + + /* Save MSR & SDR1 */ + mfmsr r4 + stw r4,SL_MSR(r11) + mfsdr1 r4 + stw r4,SL_SDR1(r11) + + /* Get a stable timebase and save it */ +1: mftbu r4 + stw r4,SL_TB(r11) + mftb r5 + stw r5,SL_TB+4(r11) + mftbu r3 + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r11) + mfsprg r4,1 + stw r4,SL_SPRG0+4(r11) + mfsprg r4,2 + stw r4,SL_SPRG0+8(r11) + mfsprg r4,3 + stw r4,SL_SPRG0+12(r11) + + /* Save BATs */ + mfdbatu r4,0 + stw r4,SL_DBAT0(r11) + mfdbatl r4,0 + stw r4,SL_DBAT0+4(r11) + mfdbatu r4,1 + stw r4,SL_DBAT1(r11) + mfdbatl r4,1 + stw r4,SL_DBAT1+4(r11) + mfdbatu r4,2 + stw r4,SL_DBAT2(r11) + mfdbatl r4,2 + stw r4,SL_DBAT2+4(r11) + mfdbatu r4,3 + stw r4,SL_DBAT3(r11) + mfdbatl r4,3 + stw r4,SL_DBAT3+4(r11) + mfibatu r4,0 + stw r4,SL_IBAT0(r11) + mfibatl r4,0 + stw r4,SL_IBAT0+4(r11) + mfibatu r4,1 + stw r4,SL_IBAT1(r11) + mfibatl r4,1 + stw r4,SL_IBAT1+4(r11) + mfibatu r4,2 + stw r4,SL_IBAT2(r11) + mfibatl r4,2 + stw r4,SL_IBAT2+4(r11) + mfibatu r4,3 + stw r4,SL_IBAT3(r11) + mfibatl r4,3 + stw r4,SL_IBAT3+4(r11) + +BEGIN_MMU_FTR_SECTION + mfspr r4,SPRN_DBAT4U + stw r4,SL_DBAT4(r11) + mfspr r4,SPRN_DBAT4L + stw r4,SL_DBAT4+4(r11) + mfspr r4,SPRN_DBAT5U + stw r4,SL_DBAT5(r11) + mfspr r4,SPRN_DBAT5L + stw r4,SL_DBAT5+4(r11) + mfspr r4,SPRN_DBAT6U + stw r4,SL_DBAT6(r11) + mfspr r4,SPRN_DBAT6L + stw r4,SL_DBAT6+4(r11) + mfspr r4,SPRN_DBAT7U + stw r4,SL_DBAT7(r11) + mfspr r4,SPRN_DBAT7L + stw r4,SL_DBAT7+4(r11) + mfspr r4,SPRN_IBAT4U + stw r4,SL_IBAT4(r11) + mfspr r4,SPRN_IBAT4L + stw r4,SL_IBAT4+4(r11) + mfspr r4,SPRN_IBAT5U + stw r4,SL_IBAT5(r11) + mfspr r4,SPRN_IBAT5L + stw r4,SL_IBAT5+4(r11) + mfspr r4,SPRN_IBAT6U + stw r4,SL_IBAT6(r11) + mfspr r4,SPRN_IBAT6L + stw r4,SL_IBAT6+4(r11) + mfspr r4,SPRN_IBAT7U + stw r4,SL_IBAT7(r11) + mfspr r4,SPRN_IBAT7L + stw r4,SL_IBAT7+4(r11) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + + /* Backup various CPU config stuffs */ + bl __save_cpu_setup + + /* The ROM can wake us up via 2 different vectors: + * - On wallstreet & lombard, we must write a magic + * value 'Lars' at address 4 and a pointer to a + * memory location containing the PC to resume from + * at address 0. + * - On Core99, we must store the wakeup vector at + * address 0x80 and eventually it's parameters + * at address 0x84. I've have some trouble with those + * parameters however and I no longer use them. + */ + lis r5,grackle_wake_up@ha + addi r5,r5,grackle_wake_up@l + tophys(r5,r5) + stw r5,SL_PC(r11) + lis r4,KERNELBASE@h + tophys(r5,r11) + addi r5,r5,SL_PC + lis r6,MAGIC@ha + addi r6,r6,MAGIC@l + stw r5,0(r4) + stw r6,4(r4) + /* Setup stuffs at 0x80-0x84 for Core99 */ + lis r3,core99_wake_up@ha + addi r3,r3,core99_wake_up@l + tophys(r3,r3) + stw r3,0x80(r4) + stw r5,0x84(r4) + + .globl low_cpu_offline_self +low_cpu_offline_self: + /* Flush & disable all caches */ + bl flush_disable_caches + + /* Turn off data relocation. */ + mfmsr r3 /* Save MSR in r7 */ + rlwinm r3,r3,0,28,26 /* Turn off DR bit */ + sync + mtmsr r3 + isync + +BEGIN_FTR_SECTION + /* Flush any pending L2 data prefetches to work around HW bug */ + sync + lis r3,0xfff0 + lwz r0,0(r3) /* perform cache-inhibited load to ROM */ + sync /* (caches are disabled at this point) */ +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + +/* + * Set the HID0 and MSR for sleep. + */ + mfspr r2,SPRN_HID0 + rlwinm r2,r2,0,10,7 /* clear doze, nap */ + oris r2,r2,HID0_SLEEP@h + sync + isync + mtspr SPRN_HID0,r2 + sync + +/* This loop puts us back to sleep in case we have a spurrious + * wakeup so that the host bridge properly stays asleep. The + * CPU will be turned off, either after a known time (about 1 + * second) on wallstreet & lombard, or as soon as the CPU enters + * SLEEP mode on core99 + */ + mfmsr r2 + oris r2,r2,MSR_POW@h +1: sync + mtmsr r2 + isync + b 1b +_ASM_NOKPROBE_SYMBOL(low_cpu_offline_self) +/* + * Here is the resume code. + */ + + +/* + * Core99 machines resume here + * r4 has the physical address of SL_PC(sp) (unused) + */ +_GLOBAL(core99_wake_up) + /* Make sure HID0 no longer contains any sleep bit and that data cache + * is disabled + */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,11,7 /* clear SLEEP, NAP, DOZE bits */ + rlwinm 3,r3,0,18,15 /* clear DCE, ICE */ + mtspr SPRN_HID0,r3 + sync + isync + + /* sanitize MSR */ + mfmsr r3 + ori r3,r3,MSR_EE|MSR_IP + xori r3,r3,MSR_EE|MSR_IP + sync + isync + mtmsr r3 + sync + isync + + /* Recover sleep storage */ + lis r3,sleep_storage@ha + addi r3,r3,sleep_storage@l + tophys(r3,r3) + addi r1,r3,SL_PC + + /* Pass thru to older resume code ... */ +_ASM_NOKPROBE_SYMBOL(core99_wake_up) +/* + * Here is the resume code for older machines. + * r1 has the physical address of SL_PC(sp). + */ + +grackle_wake_up: + + /* Restore the kernel's segment registers before + * we do any r1 memory access as we are not sure they + * are in a sane state above the first 256Mb region + */ + bl load_segment_registers + sync + isync + + subi r1,r1,SL_PC + + /* Restore various CPU config stuffs */ + bl __restore_cpu_setup + + /* Make sure all FPRs have been initialized */ + bl reloc_offset + bl __init_fpu_registers + + /* Invalidate & enable L1 cache, we don't care about + * whatever the ROM may have tried to write to memory + */ + bl __inval_enable_L1 + + /* Restore the BATs, and SDR1. Then we can turn on the MMU. */ + lwz r4,SL_SDR1(r1) + mtsdr1 r4 + lwz r4,SL_SPRG0(r1) + mtsprg 0,r4 + lwz r4,SL_SPRG0+4(r1) + mtsprg 1,r4 + lwz r4,SL_SPRG0+8(r1) + mtsprg 2,r4 + lwz r4,SL_SPRG0+12(r1) + mtsprg 3,r4 + + lwz r4,SL_DBAT0(r1) + mtdbatu 0,r4 + lwz r4,SL_DBAT0+4(r1) + mtdbatl 0,r4 + lwz r4,SL_DBAT1(r1) + mtdbatu 1,r4 + lwz r4,SL_DBAT1+4(r1) + mtdbatl 1,r4 + lwz r4,SL_DBAT2(r1) + mtdbatu 2,r4 + lwz r4,SL_DBAT2+4(r1) + mtdbatl 2,r4 + lwz r4,SL_DBAT3(r1) + mtdbatu 3,r4 + lwz r4,SL_DBAT3+4(r1) + mtdbatl 3,r4 + lwz r4,SL_IBAT0(r1) + mtibatu 0,r4 + lwz r4,SL_IBAT0+4(r1) + mtibatl 0,r4 + lwz r4,SL_IBAT1(r1) + mtibatu 1,r4 + lwz r4,SL_IBAT1+4(r1) + mtibatl 1,r4 + lwz r4,SL_IBAT2(r1) + mtibatu 2,r4 + lwz r4,SL_IBAT2+4(r1) + mtibatl 2,r4 + lwz r4,SL_IBAT3(r1) + mtibatu 3,r4 + lwz r4,SL_IBAT3+4(r1) + mtibatl 3,r4 + +BEGIN_MMU_FTR_SECTION + lwz r4,SL_DBAT4(r1) + mtspr SPRN_DBAT4U,r4 + lwz r4,SL_DBAT4+4(r1) + mtspr SPRN_DBAT4L,r4 + lwz r4,SL_DBAT5(r1) + mtspr SPRN_DBAT5U,r4 + lwz r4,SL_DBAT5+4(r1) + mtspr SPRN_DBAT5L,r4 + lwz r4,SL_DBAT6(r1) + mtspr SPRN_DBAT6U,r4 + lwz r4,SL_DBAT6+4(r1) + mtspr SPRN_DBAT6L,r4 + lwz r4,SL_DBAT7(r1) + mtspr SPRN_DBAT7U,r4 + lwz r4,SL_DBAT7+4(r1) + mtspr SPRN_DBAT7L,r4 + lwz r4,SL_IBAT4(r1) + mtspr SPRN_IBAT4U,r4 + lwz r4,SL_IBAT4+4(r1) + mtspr SPRN_IBAT4L,r4 + lwz r4,SL_IBAT5(r1) + mtspr SPRN_IBAT5U,r4 + lwz r4,SL_IBAT5+4(r1) + mtspr SPRN_IBAT5L,r4 + lwz r4,SL_IBAT6(r1) + mtspr SPRN_IBAT6U,r4 + lwz r4,SL_IBAT6+4(r1) + mtspr SPRN_IBAT6L,r4 + lwz r4,SL_IBAT7(r1) + mtspr SPRN_IBAT7U,r4 + lwz r4,SL_IBAT7+4(r1) + mtspr SPRN_IBAT7L,r4 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + + /* Flush all TLBs */ + lis r4,0x1000 +1: addic. r4,r4,-0x1000 + tlbie r4 + blt 1b + sync + + /* Restore TB */ + li r3,0 + mttbl r3 + lwz r3,SL_TB(r1) + lwz r4,SL_TB+4(r1) + mttbu r3 + mttbl r4 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r1) + mtcr r0 + lwz r2,SL_R2(r1) + lmw r12,SL_R12(r1) + + /* restore the MSR and SP and turn on the MMU and return */ + lwz r3,SL_MSR(r1) + lwz r4,SL_LR(r1) + lwz r1,SL_SP(r1) + mtsrr0 r4 + mtsrr1 r3 + sync + isync + rfi +_ASM_NOKPROBE_SYMBOL(grackle_wake_up) + +#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ + + .section .bss + .balign L1_CACHE_BYTES +sleep_storage: + .space SL_SIZE + .balign L1_CACHE_BYTES, 0 + +#endif /* CONFIG_PPC_BOOK3S_32 */ + .section .text diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c new file mode 100644 index 0000000000..8be71920e6 --- /dev/null +++ b/arch/powerpc/platforms/powermac/smp.c @@ -0,0 +1,1025 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SMP support for power macintosh. + * + * We support both the old "powersurge" SMP architecture + * and the current Core99 (G4 PowerMac) machines. + * + * Note that we don't support the very first rev. of + * Apple/DayStar 2 CPUs board, the one with the funky + * watchdog. Hopefully, none of these should be there except + * maybe internally to Apple. I should probably still add some + * code to detect this card though and disable SMP. --BenH. + * + * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net) + * and Ben Herrenschmidt . + * + * Support for DayStar quad CPU cards + * Copyright (C) XLR8, Inc. 1994-2000 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +extern void __secondary_start_pmac_0(void); + +static void (*pmac_tb_freeze)(int freeze); +static u64 timebase; +static int tb_req; + +#ifdef CONFIG_PPC_PMAC32_PSURGE + +/* + * Powersurge (old powermac SMP) support. + */ + +/* Addresses for powersurge registers */ +#define HAMMERHEAD_BASE 0xf8000000 +#define HHEAD_CONFIG 0x90 +#define HHEAD_SEC_INTR 0xc0 + +/* register for interrupting the primary processor on the powersurge */ +/* N.B. this is actually the ethernet ROM! */ +#define PSURGE_PRI_INTR 0xf3019000 + +/* register for storing the start address for the secondary processor */ +/* N.B. this is the PCI config space address register for the 1st bridge */ +#define PSURGE_START 0xf2800000 + +/* Daystar/XLR8 4-CPU card */ +#define PSURGE_QUAD_REG_ADDR 0xf8800000 + +#define PSURGE_QUAD_IRQ_SET 0 +#define PSURGE_QUAD_IRQ_CLR 1 +#define PSURGE_QUAD_IRQ_PRIMARY 2 +#define PSURGE_QUAD_CKSTOP_CTL 3 +#define PSURGE_QUAD_PRIMARY_ARB 4 +#define PSURGE_QUAD_BOARD_ID 6 +#define PSURGE_QUAD_WHICH_CPU 7 +#define PSURGE_QUAD_CKSTOP_RDBK 8 +#define PSURGE_QUAD_RESET_CTL 11 + +#define PSURGE_QUAD_OUT(r, v) (out_8(quad_base + ((r) << 4) + 4, (v))) +#define PSURGE_QUAD_IN(r) (in_8(quad_base + ((r) << 4) + 4) & 0x0f) +#define PSURGE_QUAD_BIS(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) | (v))) +#define PSURGE_QUAD_BIC(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v))) + +/* virtual addresses for the above */ +static volatile u8 __iomem *hhead_base; +static volatile u8 __iomem *quad_base; +static volatile u32 __iomem *psurge_pri_intr; +static volatile u8 __iomem *psurge_sec_intr; +static volatile u32 __iomem *psurge_start; + +/* values for psurge_type */ +#define PSURGE_NONE -1 +#define PSURGE_DUAL 0 +#define PSURGE_QUAD_OKEE 1 +#define PSURGE_QUAD_COTTON 2 +#define PSURGE_QUAD_ICEGRASS 3 + +/* what sort of powersurge board we have */ +static int psurge_type = PSURGE_NONE; + +/* irq for secondary cpus to report */ +static struct irq_domain *psurge_host; +int psurge_secondary_virq; + +/* + * Set and clear IPIs for powersurge. + */ +static inline void psurge_set_ipi(int cpu) +{ + if (psurge_type == PSURGE_NONE) + return; + if (cpu == 0) + in_be32(psurge_pri_intr); + else if (psurge_type == PSURGE_DUAL) + out_8(psurge_sec_intr, 0); + else + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_SET, 1 << cpu); +} + +static inline void psurge_clr_ipi(int cpu) +{ + if (cpu > 0) { + switch(psurge_type) { + case PSURGE_DUAL: + out_8(psurge_sec_intr, ~0); + break; + case PSURGE_NONE: + break; + default: + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, 1 << cpu); + } + } +} + +/* + * On powersurge (old SMP powermac architecture) we don't have + * separate IPIs for separate messages like openpic does. Instead + * use the generic demux helpers + * -- paulus. + */ +static irqreturn_t psurge_ipi_intr(int irq, void *d) +{ + psurge_clr_ipi(smp_processor_id()); + smp_ipi_demux(); + + return IRQ_HANDLED; +} + +static void smp_psurge_cause_ipi(int cpu) +{ + psurge_set_ipi(cpu); +} + +static int psurge_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) +{ + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq); + + return 0; +} + +static const struct irq_domain_ops psurge_host_ops = { + .map = psurge_host_map, +}; + +static int __init psurge_secondary_ipi_init(void) +{ + int rc = -ENOMEM; + + psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL); + + if (psurge_host) + psurge_secondary_virq = irq_create_direct_mapping(psurge_host); + + if (psurge_secondary_virq) + rc = request_irq(psurge_secondary_virq, psurge_ipi_intr, + IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL); + + if (rc) + pr_err("Failed to setup secondary cpu IPI\n"); + + return rc; +} + +/* + * Determine a quad card presence. We read the board ID register, we + * force the data bus to change to something else, and we read it again. + * It it's stable, then the register probably exist (ugh !) + */ +static int __init psurge_quad_probe(void) +{ + int type; + unsigned int i; + + type = PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID); + if (type < PSURGE_QUAD_OKEE || type > PSURGE_QUAD_ICEGRASS + || type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID)) + return PSURGE_DUAL; + + /* looks OK, try a slightly more rigorous test */ + /* bogus is not necessarily cacheline-aligned, + though I don't suppose that really matters. -- paulus */ + for (i = 0; i < 100; i++) { + volatile u32 bogus[8]; + bogus[(0+i)%8] = 0x00000000; + bogus[(1+i)%8] = 0x55555555; + bogus[(2+i)%8] = 0xFFFFFFFF; + bogus[(3+i)%8] = 0xAAAAAAAA; + bogus[(4+i)%8] = 0x33333333; + bogus[(5+i)%8] = 0xCCCCCCCC; + bogus[(6+i)%8] = 0xCCCCCCCC; + bogus[(7+i)%8] = 0x33333333; + wmb(); + asm volatile("dcbf 0,%0" : : "r" (bogus) : "memory"); + mb(); + if (type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID)) + return PSURGE_DUAL; + } + return type; +} + +static void __init psurge_quad_init(void) +{ + int procbits; + + if (ppc_md.progress) ppc_md.progress("psurge_quad_init", 0x351); + procbits = ~PSURGE_QUAD_IN(PSURGE_QUAD_WHICH_CPU); + if (psurge_type == PSURGE_QUAD_ICEGRASS) + PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits); + else + PSURGE_QUAD_BIC(PSURGE_QUAD_CKSTOP_CTL, procbits); + mdelay(33); + out_8(psurge_sec_intr, ~0); + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, procbits); + PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits); + if (psurge_type != PSURGE_QUAD_ICEGRASS) + PSURGE_QUAD_BIS(PSURGE_QUAD_CKSTOP_CTL, procbits); + PSURGE_QUAD_BIC(PSURGE_QUAD_PRIMARY_ARB, procbits); + mdelay(33); + PSURGE_QUAD_BIC(PSURGE_QUAD_RESET_CTL, procbits); + mdelay(33); + PSURGE_QUAD_BIS(PSURGE_QUAD_PRIMARY_ARB, procbits); + mdelay(33); +} + +static void __init smp_psurge_probe(void) +{ + int i, ncpus; + struct device_node *dn; + + /* + * The powersurge cpu board can be used in the generation + * of powermacs that have a socket for an upgradeable cpu card, + * including the 7500, 8500, 9500, 9600. + * The device tree doesn't tell you if you have 2 cpus because + * OF doesn't know anything about the 2nd processor. + * Instead we look for magic bits in magic registers, + * in the hammerhead memory controller in the case of the + * dual-cpu powersurge board. -- paulus. + */ + dn = of_find_node_by_name(NULL, "hammerhead"); + if (dn == NULL) + return; + of_node_put(dn); + + hhead_base = ioremap(HAMMERHEAD_BASE, 0x800); + quad_base = ioremap(PSURGE_QUAD_REG_ADDR, 1024); + psurge_sec_intr = hhead_base + HHEAD_SEC_INTR; + + psurge_type = psurge_quad_probe(); + if (psurge_type != PSURGE_DUAL) { + psurge_quad_init(); + /* All released cards using this HW design have 4 CPUs */ + ncpus = 4; + /* No sure how timebase sync works on those, let's use SW */ + smp_ops->give_timebase = smp_generic_give_timebase; + smp_ops->take_timebase = smp_generic_take_timebase; + } else { + iounmap(quad_base); + if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) { + /* not a dual-cpu card */ + iounmap(hhead_base); + psurge_type = PSURGE_NONE; + return; + } + ncpus = 2; + } + + if (psurge_secondary_ipi_init()) + return; + + psurge_start = ioremap(PSURGE_START, 4); + psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4); + + /* This is necessary because OF doesn't know about the + * secondary cpu(s), and thus there aren't nodes in the + * device tree for them, and smp_setup_cpu_maps hasn't + * set their bits in cpu_present_mask. + */ + if (ncpus > NR_CPUS) + ncpus = NR_CPUS; + for (i = 1; i < ncpus ; ++i) + set_cpu_present(i, true); + + if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352); +} + +static int __init smp_psurge_kick_cpu(int nr) +{ + unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8; + unsigned long a, flags; + int i, j; + + /* Defining this here is evil ... but I prefer hiding that + * crap to avoid giving people ideas that they can do the + * same. + */ + extern volatile unsigned int cpu_callin_map[NR_CPUS]; + + /* may need to flush here if secondary bats aren't setup */ + for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32) + asm volatile("dcbf 0,%0" : : "r" (a) : "memory"); + asm volatile("sync"); + + if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353); + + /* This is going to freeze the timeebase, we disable interrupts */ + local_irq_save(flags); + + out_be32(psurge_start, start); + mb(); + + psurge_set_ipi(nr); + + /* + * We can't use udelay here because the timebase is now frozen. + */ + for (i = 0; i < 2000; ++i) + asm volatile("nop" : : : "memory"); + psurge_clr_ipi(nr); + + /* + * Also, because the timebase is frozen, we must not return to the + * caller which will try to do udelay's etc... Instead, we wait -here- + * for the CPU to callin. + */ + for (i = 0; i < 100000 && !cpu_callin_map[nr]; ++i) { + for (j = 1; j < 10000; j++) + asm volatile("nop" : : : "memory"); + asm volatile("sync" : : : "memory"); + } + if (!cpu_callin_map[nr]) + goto stuck; + + /* And we do the TB sync here too for standard dual CPU cards */ + if (psurge_type == PSURGE_DUAL) { + while(!tb_req) + barrier(); + tb_req = 0; + mb(); + timebase = get_tb(); + mb(); + while (timebase) + barrier(); + mb(); + } + stuck: + /* now interrupt the secondary, restarting both TBs */ + if (psurge_type == PSURGE_DUAL) + psurge_set_ipi(1); + + if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); + + return 0; +} + +static void __init smp_psurge_setup_cpu(int cpu_nr) +{ + unsigned long flags = IRQF_PERCPU | IRQF_NO_THREAD; + int irq; + + if (cpu_nr != 0 || !psurge_start) + return; + + /* reset the entry point so if we get another intr we won't + * try to startup again */ + out_be32(psurge_start, 0x100); + irq = irq_create_mapping(NULL, 30); + if (request_irq(irq, psurge_ipi_intr, flags, "primary IPI", NULL)) + printk(KERN_ERR "Couldn't get primary IPI interrupt"); +} + +void __init smp_psurge_take_timebase(void) +{ + if (psurge_type != PSURGE_DUAL) + return; + + tb_req = 1; + mb(); + while (!timebase) + barrier(); + mb(); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + mb(); + set_dec(tb_ticks_per_jiffy/2); +} + +void __init smp_psurge_give_timebase(void) +{ + /* Nothing to do here */ +} + +/* PowerSurge-style Macs */ +struct smp_ops_t psurge_smp_ops = { + .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ + .cause_ipi = smp_psurge_cause_ipi, + .cause_nmi_ipi = NULL, + .probe = smp_psurge_probe, + .kick_cpu = smp_psurge_kick_cpu, + .setup_cpu = smp_psurge_setup_cpu, + .give_timebase = smp_psurge_give_timebase, + .take_timebase = smp_psurge_take_timebase, +}; +#endif /* CONFIG_PPC_PMAC32_PSURGE */ + +/* + * Core 99 and later support + */ + + +static void smp_core99_give_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + + while(!tb_req) + barrier(); + tb_req = 0; + (*pmac_tb_freeze)(1); + mb(); + timebase = get_tb(); + mb(); + while (timebase) + barrier(); + mb(); + (*pmac_tb_freeze)(0); + mb(); + + local_irq_restore(flags); +} + + +static void smp_core99_take_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + + tb_req = 1; + mb(); + while (!timebase) + barrier(); + mb(); + set_tb(timebase >> 32, timebase & 0xffffffff); + timebase = 0; + mb(); + + local_irq_restore(flags); +} + +#ifdef CONFIG_PPC64 +/* + * G5s enable/disable the timebase via an i2c-connected clock chip. + */ +static struct pmac_i2c_bus *pmac_tb_clock_chip_host; +static u8 pmac_tb_pulsar_addr; + +static void smp_core99_cypress_tb_freeze(int freeze) +{ + u8 data; + int rc; + + /* Strangely, the device-tree says address is 0xd2, but darwin + * accesses 0xd0 ... + */ + pmac_i2c_setmode(pmac_tb_clock_chip_host, + pmac_i2c_mode_combined); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + 0xd0 | pmac_i2c_read, + 1, 0x81, &data, 1); + if (rc != 0) + goto bail; + + data = (data & 0xf3) | (freeze ? 0x00 : 0x0c); + + pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + 0xd0 | pmac_i2c_write, + 1, 0x81, &data, 1); + + bail: + if (rc != 0) { + printk("Cypress Timebase %s rc: %d\n", + freeze ? "freeze" : "unfreeze", rc); + panic("Timebase freeze failed !\n"); + } +} + + +static void smp_core99_pulsar_tb_freeze(int freeze) +{ + u8 data; + int rc; + + pmac_i2c_setmode(pmac_tb_clock_chip_host, + pmac_i2c_mode_combined); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + pmac_tb_pulsar_addr | pmac_i2c_read, + 1, 0x2e, &data, 1); + if (rc != 0) + goto bail; + + data = (data & 0x88) | (freeze ? 0x11 : 0x22); + + pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub); + rc = pmac_i2c_xfer(pmac_tb_clock_chip_host, + pmac_tb_pulsar_addr | pmac_i2c_write, + 1, 0x2e, &data, 1); + bail: + if (rc != 0) { + printk(KERN_ERR "Pulsar Timebase %s rc: %d\n", + freeze ? "freeze" : "unfreeze", rc); + panic("Timebase freeze failed !\n"); + } +} + +static void __init smp_core99_setup_i2c_hwsync(int ncpus) +{ + struct device_node *cc = NULL; + struct device_node *p; + const char *name = NULL; + const u32 *reg; + int ok; + + /* Look for the clock chip */ + for_each_node_by_name(cc, "i2c-hwclock") { + p = of_get_parent(cc); + ok = p && of_device_is_compatible(p, "uni-n-i2c"); + of_node_put(p); + if (!ok) + continue; + + pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc); + if (pmac_tb_clock_chip_host == NULL) + continue; + reg = of_get_property(cc, "reg", NULL); + if (reg == NULL) + continue; + switch (*reg) { + case 0xd2: + if (of_device_is_compatible(cc,"pulsar-legacy-slewing")) { + pmac_tb_freeze = smp_core99_pulsar_tb_freeze; + pmac_tb_pulsar_addr = 0xd2; + name = "Pulsar"; + } else if (of_device_is_compatible(cc, "cy28508")) { + pmac_tb_freeze = smp_core99_cypress_tb_freeze; + name = "Cypress"; + } + break; + case 0xd4: + pmac_tb_freeze = smp_core99_pulsar_tb_freeze; + pmac_tb_pulsar_addr = 0xd4; + name = "Pulsar"; + break; + } + if (pmac_tb_freeze != NULL) + break; + } + if (pmac_tb_freeze != NULL) { + /* Open i2c bus for synchronous access */ + if (pmac_i2c_open(pmac_tb_clock_chip_host, 1)) { + printk(KERN_ERR "Failed top open i2c bus for clock" + " sync, fallback to software sync !\n"); + goto no_i2c_sync; + } + printk(KERN_INFO "Processor timebase sync using %s i2c clock\n", + name); + return; + } + no_i2c_sync: + pmac_tb_freeze = NULL; + pmac_tb_clock_chip_host = NULL; +} + + + +/* + * Newer G5s uses a platform function + */ + +static void smp_core99_pfunc_tb_freeze(int freeze) +{ + struct device_node *cpus; + struct pmf_args args; + + cpus = of_find_node_by_path("/cpus"); + BUG_ON(cpus == NULL); + args.count = 1; + args.u[0].v = !freeze; + pmf_call_function(cpus, "cpu-timebase", &args); + of_node_put(cpus); +} + +#else /* CONFIG_PPC64 */ + +/* + * SMP G4 use a GPIO to enable/disable the timebase. + */ + +static unsigned int core99_tb_gpio; /* Timebase freeze GPIO */ + +static void smp_core99_gpio_tb_freeze(int freeze) +{ + if (freeze) + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); + else + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); + pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); +} + + +#endif /* !CONFIG_PPC64 */ + +static void core99_init_caches(int cpu) +{ +#ifndef CONFIG_PPC64 + /* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */ + static long int core99_l2_cache; + static long int core99_l3_cache; + + if (!cpu_has_feature(CPU_FTR_L2CR)) + return; + + if (cpu == 0) { + core99_l2_cache = _get_L2CR(); + printk("CPU0: L2CR is %lx\n", core99_l2_cache); + } else { + printk("CPU%d: L2CR was %lx\n", cpu, _get_L2CR()); + _set_L2CR(0); + _set_L2CR(core99_l2_cache); + printk("CPU%d: L2CR set to %lx\n", cpu, core99_l2_cache); + } + + if (!cpu_has_feature(CPU_FTR_L3CR)) + return; + + if (cpu == 0){ + core99_l3_cache = _get_L3CR(); + printk("CPU0: L3CR is %lx\n", core99_l3_cache); + } else { + printk("CPU%d: L3CR was %lx\n", cpu, _get_L3CR()); + _set_L3CR(0); + _set_L3CR(core99_l3_cache); + printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache); + } +#endif /* !CONFIG_PPC64 */ +} + +static void __init smp_core99_setup(int ncpus) +{ +#ifdef CONFIG_PPC64 + + /* i2c based HW sync on some G5s */ + if (of_machine_is_compatible("PowerMac7,2") || + of_machine_is_compatible("PowerMac7,3") || + of_machine_is_compatible("RackMac3,1")) + smp_core99_setup_i2c_hwsync(ncpus); + + /* pfunc based HW sync on recent G5s */ + if (pmac_tb_freeze == NULL) { + struct device_node *cpus = + of_find_node_by_path("/cpus"); + if (cpus && + of_property_read_bool(cpus, "platform-cpu-timebase")) { + pmac_tb_freeze = smp_core99_pfunc_tb_freeze; + printk(KERN_INFO "Processor timebase sync using" + " platform function\n"); + } + of_node_put(cpus); + } + +#else /* CONFIG_PPC64 */ + + /* GPIO based HW sync on ppc32 Core99 */ + if (pmac_tb_freeze == NULL && !of_machine_is_compatible("MacRISC4")) { + struct device_node *cpu; + const u32 *tbprop = NULL; + + core99_tb_gpio = KL_GPIO_TB_ENABLE; /* default value */ + cpu = of_find_node_by_type(NULL, "cpu"); + if (cpu != NULL) { + tbprop = of_get_property(cpu, "timebase-enable", NULL); + if (tbprop) + core99_tb_gpio = *tbprop; + of_node_put(cpu); + } + pmac_tb_freeze = smp_core99_gpio_tb_freeze; + printk(KERN_INFO "Processor timebase sync using" + " GPIO 0x%02x\n", core99_tb_gpio); + } + +#endif /* CONFIG_PPC64 */ + + /* No timebase sync, fallback to software */ + if (pmac_tb_freeze == NULL) { + smp_ops->give_timebase = smp_generic_give_timebase; + smp_ops->take_timebase = smp_generic_take_timebase; + printk(KERN_INFO "Processor timebase sync using software\n"); + } + +#ifndef CONFIG_PPC64 + { + int i; + + /* XXX should get this from reg properties */ + for (i = 1; i < ncpus; ++i) + set_hard_smp_processor_id(i, i); + } +#endif + + /* 32 bits SMP can't NAP */ + if (!of_machine_is_compatible("MacRISC4")) + powersave_nap = 0; +} + +static void __init smp_core99_probe(void) +{ + struct device_node *cpus; + int ncpus = 0; + + if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345); + + /* Count CPUs in the device-tree */ + for_each_node_by_type(cpus, "cpu") + ++ncpus; + + printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus); + + /* Nothing more to do if less than 2 of them */ + if (ncpus <= 1) + return; + + /* We need to perform some early initialisations before we can start + * setting up SMP as we are running before initcalls + */ + pmac_pfunc_base_install(); + pmac_i2c_init(); + + /* Setup various bits like timebase sync method, ability to nap, ... */ + smp_core99_setup(ncpus); + + /* Install IPIs */ + mpic_request_ipis(); + + /* Collect l2cr and l3cr values from CPU 0 */ + core99_init_caches(0); +} + +static int smp_core99_kick_cpu(int nr) +{ + unsigned int save_vector; + unsigned long target, flags; + unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100); + + if (nr < 0 || nr > 3) + return -ENOENT; + + if (ppc_md.progress) + ppc_md.progress("smp_core99_kick_cpu", 0x346); + + local_irq_save(flags); + + /* Save reset vector */ + save_vector = *vector; + + /* Setup fake reset vector that does + * b __secondary_start_pmac_0 + nr*8 + */ + target = (unsigned long) __secondary_start_pmac_0 + nr * 8; + patch_branch(vector, target, BRANCH_SET_LINK); + + /* Put some life in our friend */ + pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); + + /* FIXME: We wait a bit for the CPU to take the exception, I should + * instead wait for the entry code to set something for me. Well, + * ideally, all that crap will be done in prom.c and the CPU left + * in a RAM-based wait loop like CHRP. + */ + mdelay(1); + + /* Restore our exception vector */ + patch_instruction(vector, ppc_inst(save_vector)); + + local_irq_restore(flags); + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); + + return 0; +} + +static void smp_core99_setup_cpu(int cpu_nr) +{ + /* Setup L2/L3 */ + if (cpu_nr != 0) + core99_init_caches(cpu_nr); + + /* Setup openpic */ + mpic_setup_this_cpu(); +} + +#ifdef CONFIG_PPC64 +#ifdef CONFIG_HOTPLUG_CPU +static unsigned int smp_core99_host_open; + +static int smp_core99_cpu_prepare(unsigned int cpu) +{ + int rc; + + /* Open i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host && !smp_core99_host_open) { + rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1); + if (rc) { + pr_err("Failed to open i2c bus for time sync\n"); + return notifier_from_errno(rc); + } + smp_core99_host_open = 1; + } + return 0; +} + +static int smp_core99_cpu_online(unsigned int cpu) +{ + /* Close i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host && smp_core99_host_open) { + pmac_i2c_close(pmac_tb_clock_chip_host); + smp_core99_host_open = 0; + } + return 0; +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static void __init smp_core99_bringup_done(void) +{ + /* Close i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host) + pmac_i2c_close(pmac_tb_clock_chip_host); + + /* If we didn't start the second CPU, we must take + * it off the bus. + */ + if (of_machine_is_compatible("MacRISC4") && + num_online_cpus() < 2) { + set_cpu_present(1, false); + g5_phy_disable_cpu1(); + } +#ifdef CONFIG_HOTPLUG_CPU + cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE, + "powerpc/pmac:prepare", smp_core99_cpu_prepare, + NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online", + smp_core99_cpu_online, NULL); +#endif + + if (ppc_md.progress) + ppc_md.progress("smp_core99_bringup_done", 0x349); +} +#endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_HOTPLUG_CPU + +static int smp_core99_cpu_disable(void) +{ + int rc = generic_cpu_disable(); + if (rc) + return rc; + + mpic_cpu_set_priority(0xf); + + cleanup_cpu_mmu_context(); + + return 0; +} + +#ifdef CONFIG_PPC32 + +static void pmac_cpu_offline_self(void) +{ + int cpu = smp_processor_id(); + + local_irq_disable(); + idle_task_exit(); + pr_debug("CPU%d offline\n", cpu); + generic_set_cpu_dead(cpu); + smp_wmb(); + mb(); + low_cpu_offline_self(); +} + +#else /* CONFIG_PPC32 */ + +static void pmac_cpu_offline_self(void) +{ + int cpu = smp_processor_id(); + + local_irq_disable(); + idle_task_exit(); + + /* + * turn off as much as possible, we'll be + * kicked out as this will only be invoked + * on core99 platforms for now ... + */ + + printk(KERN_INFO "CPU#%d offline\n", cpu); + generic_set_cpu_dead(cpu); + smp_wmb(); + + /* + * Re-enable interrupts. The NAP code needs to enable them + * anyways, do it now so we deal with the case where one already + * happened while soft-disabled. + * We shouldn't get any external interrupts, only decrementer, and the + * decrementer handler is safe for use on offline CPUs + */ + local_irq_enable(); + + while (1) { + /* let's not take timer interrupts too often ... */ + set_dec(0x7fffffff); + + /* Enter NAP mode */ + power4_idle(); + } +} + +#endif /* else CONFIG_PPC32 */ +#endif /* CONFIG_HOTPLUG_CPU */ + +/* Core99 Macs (dual G4s and G5s) */ +static struct smp_ops_t core99_smp_ops = { + .message_pass = smp_mpic_message_pass, + .probe = smp_core99_probe, +#ifdef CONFIG_PPC64 + .bringup_done = smp_core99_bringup_done, +#endif + .kick_cpu = smp_core99_kick_cpu, + .setup_cpu = smp_core99_setup_cpu, + .give_timebase = smp_core99_give_timebase, + .take_timebase = smp_core99_take_timebase, +#if defined(CONFIG_HOTPLUG_CPU) + .cpu_disable = smp_core99_cpu_disable, + .cpu_die = generic_cpu_die, +#endif +}; + +void __init pmac_setup_smp(void) +{ + struct device_node *np; + + /* Check for Core99 */ + np = of_find_node_by_name(NULL, "uni-n"); + if (!np) + np = of_find_node_by_name(NULL, "u3"); + if (!np) + np = of_find_node_by_name(NULL, "u4"); + if (np) { + of_node_put(np); + smp_ops = &core99_smp_ops; + } +#ifdef CONFIG_PPC_PMAC32_PSURGE + else { + /* We have to set bits in cpu_possible_mask here since the + * secondary CPU(s) aren't in the device tree. Various + * things won't be initialized for CPUs not in the possible + * map, so we really need to fix it up here. + */ + int cpu; + + for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu) + set_cpu_possible(cpu, true); + smp_ops = &psurge_smp_ops; + } +#endif /* CONFIG_PPC_PMAC32_PSURGE */ + +#ifdef CONFIG_HOTPLUG_CPU + smp_ops->cpu_offline_self = pmac_cpu_offline_self; +#endif +} + + diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c new file mode 100644 index 0000000000..8633891b7a --- /dev/null +++ b/arch/powerpc/platforms/powermac/time.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for periodic interrupts (100 per second) and for getting + * the current time from the RTC on Power Macintoshes. + * + * We use the decrementer register for our periodic interrupts. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + * Copyright (C) 2003-2005 Benjamin Herrenschmidt. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "pmac.h" + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +/* + * Calibrate the decrementer frequency with the VIA timer 1. + */ +#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */ + +/* VIA registers */ +#define RS 0x200 /* skip between registers */ +#define T1CL (4*RS) /* Timer 1 ctr/latch (low 8 bits) */ +#define T1CH (5*RS) /* Timer 1 counter (high 8 bits) */ +#define T1LL (6*RS) /* Timer 1 latch (low 8 bits) */ +#define T1LH (7*RS) /* Timer 1 latch (high 8 bits) */ +#define ACR (11*RS) /* Auxiliary control register */ +#define IFR (13*RS) /* Interrupt flag register */ + +/* Bits in ACR */ +#define T1MODE 0xc0 /* Timer 1 mode */ +#define T1MODE_CONT 0x40 /* continuous interrupts */ + +/* Bits in IFR and IER */ +#define T1_INT 0x40 /* Timer 1 interrupt */ + +long __init pmac_time_init(void) +{ + s32 delta = 0; +#if defined(CONFIG_NVRAM) && defined(CONFIG_PPC32) + int dst; + + delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16; + delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8; + delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb); + if (delta & 0x00800000UL) + delta |= 0xFF000000UL; + dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0); + printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60, + dst ? "on" : "off"); +#endif + return delta; +} + +#ifdef CONFIG_PMAC_SMU +static time64_t smu_get_time(void) +{ + struct rtc_time tm; + + if (smu_get_rtc_time(&tm, 1)) + return 0; + return rtc_tm_to_time64(&tm); +} +#endif + +/* Can't be __init, it's called when suspending and resuming */ +time64_t pmac_get_boot_time(void) +{ + /* Get the time from the RTC, used only at boot time */ + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + return cuda_get_time(); +#endif +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + return pmu_get_time(); +#endif +#ifdef CONFIG_PMAC_SMU + case SYS_CTRLER_SMU: + return smu_get_time(); +#endif + default: + return 0; + } +} + +void pmac_get_rtc_time(struct rtc_time *tm) +{ + /* Get the time from the RTC, used only at boot time */ + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + rtc_time64_to_tm(cuda_get_time(), tm); + break; +#endif +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + rtc_time64_to_tm(pmu_get_time(), tm); + break; +#endif +#ifdef CONFIG_PMAC_SMU + case SYS_CTRLER_SMU: + smu_get_rtc_time(tm, 1); + break; +#endif + default: + ; + } +} + +int pmac_set_rtc_time(struct rtc_time *tm) +{ + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + return cuda_set_rtc_time(tm); +#endif +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + return pmu_set_rtc_time(tm); +#endif +#ifdef CONFIG_PMAC_SMU + case SYS_CTRLER_SMU: + return smu_set_rtc_time(tm, 1); +#endif + default: + return -ENODEV; + } +} + +#ifdef CONFIG_PPC32 +/* + * Calibrate the decrementer register using VIA timer 1. + * This is used both on powermacs and CHRP machines. + */ +static int __init via_calibrate_decr(void) +{ + struct device_node *vias; + volatile unsigned char __iomem *via; + int count = VIA_TIMER_FREQ_6 / 100; + unsigned int dstart, dend; + struct resource rsrc; + + vias = of_find_node_by_name(NULL, "via-cuda"); + if (vias == NULL) + vias = of_find_node_by_name(NULL, "via-pmu"); + if (vias == NULL) + vias = of_find_node_by_name(NULL, "via"); + if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) { + of_node_put(vias); + return 0; + } + of_node_put(vias); + via = early_ioremap(rsrc.start, resource_size(&rsrc)); + if (via == NULL) { + printk(KERN_ERR "Failed to map VIA for timer calibration !\n"); + return 0; + } + + /* set timer 1 for continuous interrupts */ + out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT); + /* set the counter to a small value */ + out_8(&via[T1CH], 2); + /* set the latch to `count' */ + out_8(&via[T1LL], count); + out_8(&via[T1LH], count >> 8); + /* wait until it hits 0 */ + while ((in_8(&via[IFR]) & T1_INT) == 0) + ; + dstart = get_dec(); + /* clear the interrupt & wait until it hits 0 again */ + in_8(&via[T1CL]); + while ((in_8(&via[IFR]) & T1_INT) == 0) + ; + dend = get_dec(); + + ppc_tb_freq = (dstart - dend) * 100 / 6; + + early_iounmap((void *)via, resource_size(&rsrc)); + + return 1; +} +#endif + +/* + * Query the OF and get the decr frequency. + */ +void __init pmac_calibrate_decr(void) +{ + generic_calibrate_decr(); + +#ifdef CONFIG_PPC32 + /* We assume MacRISC2 machines have correct device-tree + * calibration. That's better since the VIA itself seems + * to be slightly off. --BenH + */ + if (!of_machine_is_compatible("MacRISC2") && + !of_machine_is_compatible("MacRISC3") && + !of_machine_is_compatible("MacRISC4")) + if (via_calibrate_decr()) + return; + + /* Special case: QuickSilver G4s seem to have a badly calibrated + * timebase-frequency in OF, VIA is much better on these. We should + * probably implement calibration based on the KL timer on these + * machines anyway... -BenH + */ + if (of_machine_is_compatible("PowerMac3,5")) + if (via_calibrate_decr()) + return; +#endif +} diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c new file mode 100644 index 0000000000..b4756defd5 --- /dev/null +++ b/arch/powerpc/platforms/powermac/udbg_adb.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This implementation is "special", it can "patch" the current + * udbg implementation and work on top of it. It must thus be + * initialized last + */ + +static void (*udbg_adb_old_putc)(char c); +static int (*udbg_adb_old_getc)(void); +static int (*udbg_adb_old_getc_poll)(void); + +static enum { + input_adb_none, + input_adb_pmu, + input_adb_cuda, +} input_type = input_adb_none; + +int xmon_wants_key, xmon_adb_keycode; + +static inline void udbg_adb_poll(void) +{ +#ifdef CONFIG_ADB_PMU + if (input_type == input_adb_pmu) + pmu_poll_adb(); +#endif /* CONFIG_ADB_PMU */ +#ifdef CONFIG_ADB_CUDA + if (input_type == input_adb_cuda) + cuda_poll(); +#endif /* CONFIG_ADB_CUDA */ +} + +#ifdef CONFIG_BOOTX_TEXT + +static int udbg_adb_use_btext; +static int xmon_adb_shiftstate; + +static unsigned char xmon_keytab[128] = + "asdfhgzxcv\000bqwer" /* 0x00 - 0x0f */ + "yt123465=97-80]o" /* 0x10 - 0x1f */ + "u[ip\rlj'k;\\,/nm." /* 0x20 - 0x2f */ + "\t `\177\0\033\0\0\0\0\0\0\0\0\0\0" /* 0x30 - 0x3f */ + "\0.\0*\0+\0\0\0\0\0/\r\0-\0" /* 0x40 - 0x4f */ + "\0\0000123456789\0\0\0"; /* 0x50 - 0x5f */ + +static unsigned char xmon_shift_keytab[128] = + "ASDFHGZXCV\000BQWER" /* 0x00 - 0x0f */ + "YT!@#$^%+(&_*)}O" /* 0x10 - 0x1f */ + "U{IP\rLJ\"K:|" /* 0x20 - 0x2f */ + "\t ~\177\0\033\0\0\0\0\0\0\0\0\0\0" /* 0x30 - 0x3f */ + "\0.\0*\0+\0\0\0\0\0/\r\0-\0" /* 0x40 - 0x4f */ + "\0\0000123456789\0\0\0"; /* 0x50 - 0x5f */ + +static int udbg_adb_local_getc(void) +{ + int k, t, on; + + xmon_wants_key = 1; + for (;;) { + xmon_adb_keycode = -1; + t = 0; + on = 0; + k = -1; + do { + if (--t < 0) { + on = 1 - on; + btext_drawchar(on? 0xdb: 0x20); + btext_drawchar('\b'); + t = 200000; + } + udbg_adb_poll(); + if (udbg_adb_old_getc_poll) + k = udbg_adb_old_getc_poll(); + } while (k == -1 && xmon_adb_keycode == -1); + if (on) + btext_drawstring(" \b"); + if (k != -1) + return k; + k = xmon_adb_keycode; + + /* test for shift keys */ + if ((k & 0x7f) == 0x38 || (k & 0x7f) == 0x7b) { + xmon_adb_shiftstate = (k & 0x80) == 0; + continue; + } + if (k >= 0x80) + continue; /* ignore up transitions */ + k = (xmon_adb_shiftstate? xmon_shift_keytab: xmon_keytab)[k]; + if (k != 0) + break; + } + xmon_wants_key = 0; + return k; +} +#endif /* CONFIG_BOOTX_TEXT */ + +static int udbg_adb_getc(void) +{ +#ifdef CONFIG_BOOTX_TEXT + if (udbg_adb_use_btext && input_type != input_adb_none) + return udbg_adb_local_getc(); +#endif + if (udbg_adb_old_getc) + return udbg_adb_old_getc(); + return -1; +} + +/* getc_poll() is not really used, unless you have the xmon-over modem + * hack that doesn't quite concern us here, thus we just poll the low level + * ADB driver to prevent it from timing out and call back the original poll + * routine. + */ +static int udbg_adb_getc_poll(void) +{ + udbg_adb_poll(); + + if (udbg_adb_old_getc_poll) + return udbg_adb_old_getc_poll(); + return -1; +} + +static void udbg_adb_putc(char c) +{ +#ifdef CONFIG_BOOTX_TEXT + if (udbg_adb_use_btext) + btext_drawchar(c); +#endif + if (udbg_adb_old_putc) + return udbg_adb_old_putc(c); +} + +void __init udbg_adb_init_early(void) +{ +#ifdef CONFIG_BOOTX_TEXT + if (btext_find_display(1) == 0) { + udbg_adb_use_btext = 1; + udbg_putc = udbg_adb_putc; + } +#endif +} + +int __init udbg_adb_init(int force_btext) +{ + struct device_node *np; + + /* Capture existing callbacks */ + udbg_adb_old_putc = udbg_putc; + udbg_adb_old_getc = udbg_getc; + udbg_adb_old_getc_poll = udbg_getc_poll; + + /* Check if our early init was already called */ + if (udbg_adb_old_putc == udbg_adb_putc) + udbg_adb_old_putc = NULL; +#ifdef CONFIG_BOOTX_TEXT + if (udbg_adb_old_putc == btext_drawchar) + udbg_adb_old_putc = NULL; +#endif + + /* Set ours as output */ + udbg_putc = udbg_adb_putc; + udbg_getc = udbg_adb_getc; + udbg_getc_poll = udbg_adb_getc_poll; + +#ifdef CONFIG_BOOTX_TEXT + /* Check if we should use btext output */ + if (btext_find_display(force_btext) == 0) + udbg_adb_use_btext = 1; +#endif + + /* See if there is a keyboard in the device tree with a parent + * of type "adb". If not, we return a failure, but we keep the + * bext output set for now + */ + for_each_node_by_name(np, "keyboard") { + struct device_node *parent = of_get_parent(np); + int found = of_node_is_type(parent, "adb"); + of_node_put(parent); + if (found) + break; + } + if (np == NULL) + return -ENODEV; + of_node_put(np); + +#ifdef CONFIG_ADB_PMU + if (find_via_pmu()) + input_type = input_adb_pmu; +#endif +#ifdef CONFIG_ADB_CUDA + if (find_via_cuda()) + input_type = input_adb_cuda; +#endif + + /* Same as above: nothing found, keep btext set for output */ + if (input_type == input_adb_none) + return -ENODEV; + + return 0; +} diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c new file mode 100644 index 0000000000..1b7c39e841 --- /dev/null +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * udbg for zilog scc ports as found on Apple PowerMacs + * + * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp + */ +#include +#include +#include +#include +#include +#include + +extern u8 real_readb(volatile u8 __iomem *addr); +extern void real_writeb(u8 data, volatile u8 __iomem *addr); + +#define SCC_TXRDY 4 +#define SCC_RXRDY 1 + +static volatile u8 __iomem *sccc; +static volatile u8 __iomem *sccd; + +static void udbg_scc_putc(char c) +{ + if (sccc) { + while ((in_8(sccc) & SCC_TXRDY) == 0) + ; + out_8(sccd, c); + if (c == '\n') + udbg_scc_putc('\r'); + } +} + +static int udbg_scc_getc_poll(void) +{ + if (sccc) { + if ((in_8(sccc) & SCC_RXRDY) != 0) + return in_8(sccd); + else + return -1; + } + return -1; +} + +static int udbg_scc_getc(void) +{ + if (sccc) { + while ((in_8(sccc) & SCC_RXRDY) == 0) + ; + return in_8(sccd); + } + return -1; +} + +static unsigned char scc_inittab[] = { + 13, 0, /* set baud rate divisor */ + 12, 0, + 14, 1, /* baud rate gen enable, src=rtxc */ + 11, 0x50, /* clocks = br gen */ + 5, 0xea, /* tx 8 bits, assert DTR & RTS */ + 4, 0x46, /* x16 clock, 1 stop */ + 3, 0xc1, /* rx enable, 8 bits */ +}; + +void __init udbg_scc_init(int force_scc) +{ + const u32 *reg; + unsigned long addr; + struct device_node *stdout = NULL, *escc = NULL, *macio = NULL; + struct device_node *ch, *ch_def = NULL, *ch_a = NULL; + const char *path; + int i; + + escc = of_find_node_by_name(NULL, "escc"); + if (escc == NULL) + goto bail; + macio = of_get_parent(escc); + if (macio == NULL) + goto bail; + path = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (path != NULL) + stdout = of_find_node_by_path(path); + for_each_child_of_node(escc, ch) { + if (ch == stdout) { + of_node_put(ch_def); + ch_def = of_node_get(ch); + } + if (of_node_name_eq(ch, "ch-a")) { + of_node_put(ch_a); + ch_a = of_node_get(ch); + } + } + if (ch_def == NULL && !force_scc) + goto bail; + + ch = ch_def ? ch_def : ch_a; + + /* Get address within mac-io ASIC */ + reg = of_get_property(escc, "reg", NULL); + if (reg == NULL) + goto bail; + addr = reg[0]; + + /* Get address of mac-io PCI itself */ + reg = of_get_property(macio, "assigned-addresses", NULL); + if (reg == NULL) + goto bail; + addr += reg[2]; + + /* Lock the serial port */ + pmac_call_feature(PMAC_FTR_SCC_ENABLE, ch, + PMAC_SCC_ASYNC | PMAC_SCC_FLAG_XMON, 1); + + if (ch == ch_a) + addr += 0x20; + sccc = ioremap(addr & PAGE_MASK, PAGE_SIZE) ; + sccc += addr & ~PAGE_MASK; + sccd = sccc + 0x10; + + mb(); + + for (i = 20000; i != 0; --i) + in_8(sccc); + out_8(sccc, 0x09); /* reset A or B side */ + out_8(sccc, 0xc0); + + /* If SCC was the OF output port, read the BRG value, else + * Setup for 38400 or 57600 8N1 depending on the machine + */ + if (ch_def != NULL) { + out_8(sccc, 13); + scc_inittab[1] = in_8(sccc); + out_8(sccc, 12); + scc_inittab[3] = in_8(sccc); + } else if (of_machine_is_compatible("RackMac1,1") + || of_machine_is_compatible("RackMac1,2") + || of_machine_is_compatible("MacRISC4")) { + /* Xserves and G5s default to 57600 */ + scc_inittab[1] = 0; + scc_inittab[3] = 0; + } else { + /* Others default to 38400 */ + scc_inittab[1] = 0; + scc_inittab[3] = 1; + } + + for (i = 0; i < sizeof(scc_inittab); ++i) + out_8(sccc, scc_inittab[i]); + + + udbg_putc = udbg_scc_putc; + udbg_getc = udbg_scc_getc; + udbg_getc_poll = udbg_scc_getc_poll; + + udbg_puts("Hello World !\n"); + + bail: + of_node_put(macio); + of_node_put(escc); + of_node_put(stdout); + of_node_put(ch_def); + of_node_put(ch_a); +} + +#ifdef CONFIG_PPC64 +static void udbg_real_scc_putc(char c) +{ + while ((real_readb(sccc) & SCC_TXRDY) == 0) + ; + real_writeb(c, sccd); + if (c == '\n') + udbg_real_scc_putc('\r'); +} + +void __init udbg_init_pmac_realmode(void) +{ + sccc = (volatile u8 __iomem *)0x80013020ul; + sccd = (volatile u8 __iomem *)0x80013030ul; + + udbg_putc = udbg_real_scc_putc; + udbg_getc = NULL; + udbg_getc_poll = NULL; +} +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig new file mode 100644 index 0000000000..70a46acc70 --- /dev/null +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_POWERNV + depends on PPC64 && PPC_BOOK3S + bool "IBM PowerNV (Non-Virtualized) platform support" + select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU + select PPC_XICS + select PPC_ICP_NATIVE + select PPC_XIVE_NATIVE + select PPC_P7_NAP + select FORCE_PCI + select PCI_MSI + select EPAPR_BOOT + select PPC_INDIRECT_PIO + select PPC_UDBG_16550 + select CPU_FREQ + select PPC_DOORBELL + select MMU_NOTIFIER + select FORCE_SMP + select ARCH_SUPPORTS_PER_VMA_LOCK + default y + +config OPAL_PRD + tristate "OPAL PRD driver" + depends on PPC_POWERNV + help + This enables the opal-prd driver, a facility to run processor + recovery diagnostics on OpenPower machines + +config PPC_MEMTRACE + bool "Enable runtime allocation of RAM for tracing" + depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC + help + Enabling this option allows for runtime allocation of memory (RAM) + for hardware tracing. + +config SCOM_DEBUGFS + bool "Expose SCOM controllers via debugfs" + depends on DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile new file mode 100644 index 0000000000..19f0fc5c6f --- /dev/null +++ b/arch/powerpc/platforms/powernv/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 + +# nothing that deals with real mode is safe to KASAN +# in particular, idle code runs a bunch of things in real mode +KASAN_SANITIZE_idle.o := n +KASAN_SANITIZE_pci-ioda.o := n +KASAN_SANITIZE_pci-ioda-tce.o := n +# pnv_machine_check_early +KASAN_SANITIZE_setup.o := n + +obj-y += setup.o opal-call.o opal-wrappers.o opal.o opal-async.o +obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o +obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o +obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o +obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o +obj-y += ultravisor.o + +obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o +obj-$(CONFIG_FA_DUMP) += opal-fadump.o +obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o +obj-$(CONFIG_OPAL_CORE) += opal-core.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o +obj-$(CONFIG_PCI_IOV) += pci-sriov.o +obj-$(CONFIG_CXL_BASE) += pci-cxl.o +obj-$(CONFIG_EEH) += eeh-powernv.o +obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o +obj-$(CONFIG_OPAL_PRD) += opal-prd.o +obj-$(CONFIG_PERF_EVENTS) += opal-imc.o +obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o +obj-$(CONFIG_OCXL_BASE) += ocxl.o +obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o +obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h new file mode 100644 index 0000000000..f063807eda --- /dev/null +++ b/arch/powerpc/platforms/powernv/copy-paste.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2016-17 IBM Corp. + */ +#include +#include + +/* + * Copy/paste instructions: + * + * copy RA,RB + * Copy contents of address (RA) + effective_address(RB) + * to internal copy-buffer. + * + * paste RA,RB + * Paste contents of internal copy-buffer to the address + * (RA) + effective_address(RB) + */ +static inline int vas_copy(void *crb, int offset) +{ + asm volatile(PPC_COPY(%0, %1)";" + : + : "b" (offset), "b" (crb) + : "memory"); + + return 0; +} + +static inline int vas_paste(void *paste_address, int offset) +{ + u32 cr; + + cr = 0; + asm volatile(PPC_PASTE(%1, %2)";" + "mfocrf %0, 0x80;" + : "=r" (cr) + : "b" (offset), "b" (paste_address) + : "memory", "cr0"); + + /* We mask with 0xE to ignore SO */ + return (cr >> CR0_SHIFT) & 0xE; +} diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c new file mode 100644 index 0000000000..af3a5d37a1 --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -0,0 +1,1696 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV Platform dependent EEH operations + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "powernv.h" +#include "pci.h" +#include "../../../../drivers/pci/pci.h" + +static int eeh_event_irq = -EINVAL; + +static void pnv_pcibios_bus_add_device(struct pci_dev *pdev) +{ + dev_dbg(&pdev->dev, "EEH: Setting up device\n"); + eeh_probe_device(pdev); +} + +static irqreturn_t pnv_eeh_event(int irq, void *data) +{ + /* + * We simply send a special EEH event if EEH has been + * enabled. We don't care about EEH events until we've + * finished processing the outstanding ones. Event processing + * gets unmasked in next_error() if EEH is enabled. + */ + disable_irq_nosync(irq); + + if (eeh_enabled()) + eeh_send_failure_event(NULL); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_DEBUG_FS +static ssize_t pnv_eeh_ei_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose = filp->private_data; + struct eeh_pe *pe; + int pe_no, type, func; + unsigned long addr, mask; + char buf[50]; + int ret; + + if (!eeh_ops || !eeh_ops->err_inject) + return -ENXIO; + + /* Copy over argument buffer */ + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* Retrieve parameters */ + ret = sscanf(buf, "%x:%x:%x:%lx:%lx", + &pe_no, &type, &func, &addr, &mask); + if (ret != 5) + return -EINVAL; + + /* Retrieve PE */ + pe = eeh_pe_get(hose, pe_no); + if (!pe) + return -ENODEV; + + /* Do error injection */ + ret = eeh_ops->err_inject(pe, type, func, addr, mask); + return ret < 0 ? ret : count; +} + +static const struct file_operations pnv_eeh_ei_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = pnv_eeh_ei_write, +}; + +static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + out_be64(phb->regs + offset, val); + return 0; +} + +static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + *val = in_be64(phb->regs + offset); + return 0; +} + +#define PNV_EEH_DBGFS_ENTRY(name, reg) \ +static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \ +{ \ + return pnv_eeh_dbgfs_set(data, reg, val); \ +} \ + \ +static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \ +{ \ + return pnv_eeh_dbgfs_get(data, reg, val); \ +} \ + \ +DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \ + pnv_eeh_dbgfs_get_##name, \ + pnv_eeh_dbgfs_set_##name, \ + "0x%llx\n") + +PNV_EEH_DBGFS_ENTRY(outb, 0xD10); +PNV_EEH_DBGFS_ENTRY(inbA, 0xD90); +PNV_EEH_DBGFS_ENTRY(inbB, 0xE10); + +#endif /* CONFIG_DEBUG_FS */ + +static void pnv_eeh_enable_phbs(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + /* + * If EEH is enabled, we're going to rely on that. + * Otherwise, we restore to conventional mechanism + * to clear frozen PE during PCI config access. + */ + if (eeh_enabled()) + phb->flags |= PNV_PHB_FLAG_EEH; + else + phb->flags &= ~PNV_PHB_FLAG_EEH; + } +} + +/** + * pnv_eeh_post_init - EEH platform dependent post initialization + * + * EEH platform dependent post initialization on powernv. When + * the function is called, the EEH PEs and devices should have + * been built. If the I/O cache staff has been built, EEH is + * ready to supply service. + */ +int pnv_eeh_post_init(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + int ret = 0; + + eeh_show_enabled(); + + /* Register OPAL event notifier */ + eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); + if (eeh_event_irq < 0) { + pr_err("%s: Can't register OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); + return eeh_event_irq; + } + + ret = request_irq(eeh_event_irq, pnv_eeh_event, + IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); + if (ret < 0) { + irq_dispose_mapping(eeh_event_irq); + pr_err("%s: Can't request OPAL event interrupt (%d)\n", + __func__, eeh_event_irq); + return ret; + } + + if (!eeh_enabled()) + disable_irq(eeh_event_irq); + + pnv_eeh_enable_phbs(); + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + /* Create debugfs entries */ +#ifdef CONFIG_DEBUG_FS + if (phb->has_dbgfs || !phb->dbgfs) + continue; + + phb->has_dbgfs = 1; + debugfs_create_file("err_injct", 0200, + phb->dbgfs, hose, + &pnv_eeh_ei_fops); + + debugfs_create_file("err_injct_outbound", 0600, + phb->dbgfs, hose, + &pnv_eeh_dbgfs_ops_outb); + debugfs_create_file("err_injct_inboundA", 0600, + phb->dbgfs, hose, + &pnv_eeh_dbgfs_ops_inbA); + debugfs_create_file("err_injct_inboundB", 0600, + phb->dbgfs, hose, + &pnv_eeh_dbgfs_ops_inbB); +#endif /* CONFIG_DEBUG_FS */ + } + + return ret; +} + +static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap) +{ + int pos = PCI_CAPABILITY_LIST; + int cnt = 48; /* Maximal number of capabilities */ + u32 status, id; + + if (!pdn) + return 0; + + /* Check if the device supports capabilities */ + pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + while (cnt--) { + pnv_pci_cfg_read(pdn, pos, 1, &pos); + if (pos < 0x40) + break; + + pos &= ~3; + pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + + /* Found */ + if (id == cap) + return pos; + + /* Next one */ + pos += PCI_CAP_LIST_NEXT; + } + + return 0; +} + +static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 header; + int pos = 256, ttl = (4096 - 256) / 8; + + if (!edev || !edev->pcie_cap) + return 0; + if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + return 0; + else if (!header) + return 0; + + while (ttl-- > 0) { + if (PCI_EXT_CAP_ID(header) == cap && pos) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < 256) + break; + + if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + break; + } + + return 0; +} + +static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev) +{ + struct pci_controller *hose = pdev->bus->sysdata; + struct pnv_phb *phb = hose->private_data; + struct pci_dev *parent = pdev->bus->self; + +#ifdef CONFIG_PCI_IOV + /* for VFs we use the PF's PE as the upstream PE */ + if (pdev->is_virtfn) + parent = pdev->physfn; +#endif + + /* otherwise use the PE of our parent bridge */ + if (parent) { + struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent); + + return eeh_pe_get(phb->hose, ioda_pe->pe_number); + } + + return NULL; +} + +/** + * pnv_eeh_probe - Do probe on PCI device + * @pdev: pci_dev to probe + * + * Create, or find the existing, eeh_dev for this pci_dev. + */ +static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pci_controller *hose = pdn->phb; + struct pnv_phb *phb = hose->private_data; + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + struct eeh_pe *upstream_pe; + uint32_t pcie_flags; + int ret; + int config_addr = (pdn->busno << 8) | (pdn->devfn); + + /* + * When probing the root bridge, which doesn't have any + * subordinate PCI devices. We don't have OF node for + * the root bridge. So it's not reasonable to continue + * the probing. + */ + if (!edev || edev->pe) + return NULL; + + /* already configured? */ + if (edev->pdev) { + pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n", + __func__, hose->global_number, config_addr >> 8, + PCI_SLOT(config_addr), PCI_FUNC(config_addr)); + return edev; + } + + /* Skip for PCI-ISA bridge */ + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) + return NULL; + + eeh_edev_dbg(edev, "Probing device\n"); + + /* Initialize eeh device */ + edev->mode &= 0xFFFFFF00; + edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); + edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); + edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); + if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) { + edev->mode |= EEH_DEV_BRIDGE; + if (edev->pcie_cap) { + pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); + pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; + if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + } + + edev->pe_config_addr = phb->ioda.pe_rmap[config_addr]; + + upstream_pe = pnv_eeh_get_upstream_pe(pdev); + + /* Create PE */ + ret = eeh_pe_tree_insert(edev, upstream_pe); + if (ret) { + eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret); + return NULL; + } + + /* + * If the PE contains any one of following adapters, the + * PCI config space can't be accessed when dumping EEH log. + * Otherwise, we will run into fenced PHB caused by shortage + * of outbound credits in the adapter. The PCI config access + * should be blocked until PE reset. MMIO access is dropped + * by hardware certainly. In order to drop PCI config requests, + * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which + * will be checked in the backend for PE state retrieval. If + * the PE becomes frozen for the first time and the flag has + * been set for the PE, we will set EEH_PE_CFG_BLOCKED for + * that PE to block its config space. + * + * Broadcom BCM5718 2-ports NICs (14e4:1656) + * Broadcom Austin 4-ports NICs (14e4:1657) + * Broadcom Shiner 4-ports 1G NICs (14e4:168a) + * Broadcom Shiner 2-ports 10G NICs (14e4:168e) + */ + if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x1656) || + (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x1657) || + (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x168a) || + (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x168e)) + edev->pe->state |= EEH_PE_CFG_RESTRICTED; + + /* + * Cache the PE primary bus, which can't be fetched when + * full hotplug is in progress. In that case, all child + * PCI devices of the PE are expected to be removed prior + * to PE reset. + */ + if (!(edev->pe->state & EEH_PE_PRI_BUS)) { + edev->pe->bus = pci_find_bus(hose->global_number, + pdn->busno); + if (edev->pe->bus) + edev->pe->state |= EEH_PE_PRI_BUS; + } + + /* + * Enable EEH explicitly so that we will do EEH check + * while accessing I/O stuff + */ + if (!eeh_has_flag(EEH_ENABLED)) { + enable_irq(eeh_event_irq); + pnv_eeh_enable_phbs(); + eeh_add_flag(EEH_ENABLED); + } + + /* Save memory bars */ + eeh_save_bars(edev); + + eeh_edev_dbg(edev, "EEH enabled on device\n"); + + return edev; +} + +/** + * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable + * @pe: EEH PE + * @option: operation to be issued + * + * The function is used to control the EEH functionality globally. + * Currently, following options are support according to PAPR: + * Enable EEH, Disable EEH, Enable MMIO and Enable DMA + */ +static int pnv_eeh_set_option(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + bool freeze_pe = false; + int opt; + s64 rc; + + switch (option) { + case EEH_OPT_DISABLE: + return -EPERM; + case EEH_OPT_ENABLE: + return 0; + case EEH_OPT_THAW_MMIO: + opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; + break; + case EEH_OPT_THAW_DMA: + opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; + break; + case EEH_OPT_FREEZE_PE: + freeze_pe = true; + opt = OPAL_EEH_ACTION_SET_FREEZE_ALL; + break; + default: + pr_warn("%s: Invalid option %d\n", __func__, option); + return -EINVAL; + } + + /* Freeze master and slave PEs if PHB supports compound PEs */ + if (freeze_pe) { + if (phb->freeze_pe) { + phb->freeze_pe(phb, pe->addr); + return 0; + } + + rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + pe->addr); + return -EIO; + } + + return 0; + } + + /* Unfreeze master and slave PEs if PHB supports */ + if (phb->unfreeze_pe) + return phb->unfreeze_pe(phb, pe->addr, opt); + + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n", + __func__, rc, option, phb->hose->global_number, + pe->addr); + return -EIO; + } + + return 0; +} + +static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + s64 rc; + + rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, + phb->diag_data_size); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failure %lld getting PHB#%x diag-data\n", + __func__, rc, pe->phb->global_number); +} + +static int pnv_eeh_get_phb_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + u8 fstate = 0; + __be16 pcierr = 0; + s64 rc; + int result = 0; + + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x state\n", + __func__, rc, phb->hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } + + /* + * Check PHB state. If the PHB is frozen for the + * first time, to dump the PHB diag-data. + */ + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + } else if (!(pe->state & EEH_PE_ISOLATED)) { + eeh_pe_mark_isolated(pe); + pnv_eeh_get_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + } + + return result; +} + +static int pnv_eeh_get_pe_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + u8 fstate = 0; + __be16 pcierr = 0; + s64 rc; + int result; + + /* + * We don't clobber hardware frozen state until PE + * reset is completed. In order to keep EEH core + * moving forward, we have to return operational + * state during PE reset. + */ + if (pe->state & EEH_PE_RESET) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + return result; + } + + /* + * Fetch PE state from hardware. If the PHB + * supports compound PE, let it handle that. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", + __func__, rc, phb->hose->global_number, + pe->addr); + return EEH_STATE_NOT_SUPPORT; + } + } + + /* Figure out state */ + switch (fstate) { + case OPAL_EEH_STOPPED_NOT_FROZEN: + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + break; + case OPAL_EEH_STOPPED_MMIO_FREEZE: + result = (EEH_STATE_DMA_ACTIVE | + EEH_STATE_DMA_ENABLED); + break; + case OPAL_EEH_STOPPED_DMA_FREEZE: + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_MMIO_ENABLED); + break; + case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: + result = 0; + break; + case OPAL_EEH_STOPPED_RESET: + result = EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_TEMP_UNAVAIL: + result = EEH_STATE_UNAVAILABLE; + break; + case OPAL_EEH_STOPPED_PERM_UNAVAIL: + result = EEH_STATE_NOT_SUPPORT; + break; + default: + result = EEH_STATE_NOT_SUPPORT; + pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", + __func__, phb->hose->global_number, + pe->addr, fstate); + } + + /* + * If PHB supports compound PE, to freeze all + * slave PEs for consistency. + * + * If the PE is switching to frozen state for the + * first time, to dump the PHB diag-data. + */ + if (!(result & EEH_STATE_NOT_SUPPORT) && + !(result & EEH_STATE_UNAVAILABLE) && + !(result & EEH_STATE_MMIO_ACTIVE) && + !(result & EEH_STATE_DMA_ACTIVE) && + !(pe->state & EEH_PE_ISOLATED)) { + if (phb->freeze_pe) + phb->freeze_pe(phb, pe->addr); + + eeh_pe_mark_isolated(pe); + pnv_eeh_get_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + } + + return result; +} + +/** + * pnv_eeh_get_state - Retrieve PE state + * @pe: EEH PE + * @delay: delay while PE state is temporarily unavailable + * + * Retrieve the state of the specified PE. For IODA-compitable + * platform, it should be retrieved from IODA table. Therefore, + * we prefer passing down to hardware implementation to handle + * it. + */ +static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay) +{ + int ret; + + if (pe->type & EEH_PE_PHB) + ret = pnv_eeh_get_phb_state(pe); + else + ret = pnv_eeh_get_pe_state(pe); + + if (!delay) + return ret; + + /* + * If the PE state is temporarily unavailable, + * to inform the EEH core delay for default + * period (1 second) + */ + *delay = 0; + if (ret & EEH_STATE_UNAVAILABLE) + *delay = 1000; + + return ret; +} + +static s64 pnv_eeh_poll(unsigned long id) +{ + s64 rc = OPAL_HARDWARE; + + while (1) { + rc = opal_pci_poll(id); + if (rc <= 0) + break; + + if (system_state < SYSTEM_RUNNING) + udelay(1000 * rc); + else + msleep(rc); + } + + return rc; +} + +int pnv_eeh_phb_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_HARDWARE; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* Issue PHB complete reset request */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_COMPLETE, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_COMPLETE, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* + * Poll state of the PHB until the request is done + * successfully. The PHB reset is usually PHB complete + * reset followed by hot reset on root bus. So we also + * need the PCI bus settlement delay. + */ + if (rc > 0) + rc = pnv_eeh_poll(phb->opal_id); + if (option == EEH_RESET_DEACTIVATE) { + if (system_state < SYSTEM_RUNNING) + udelay(1000 * EEH_PE_RST_SETTLE_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + } +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int pnv_eeh_root_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_HARDWARE; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* + * During the reset deassert time, we needn't care + * the reset scope because the firmware does nothing + * for fundamental or hot reset during deassert phase. + */ + if (option == EEH_RESET_FUNDAMENTAL) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PCI_FUNDAMENTAL, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PCI_HOT, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PCI_HOT, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* Poll state of the PHB until the request is done */ + if (rc > 0) + rc = pnv_eeh_poll(phb->opal_id); + if (option == EEH_RESET_DEACTIVATE) + msleep(EEH_PE_RST_SETTLE_TIME); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) +{ + struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + int aer = edev ? edev->aer_cap : 0; + u32 ctrl; + + pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n", + __func__, pci_domain_nr(dev->bus), + dev->bus->number, option); + + switch (option) { + case EEH_RESET_FUNDAMENTAL: + case EEH_RESET_HOT: + /* Don't report linkDown event */ + if (aer) { + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, + 4, &ctrl); + ctrl |= PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, + 4, ctrl); + } + + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); + + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl); + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl); + + msleep(EEH_PE_RST_SETTLE_TIME); + + /* Continue reporting linkDown event */ + if (aer) { + eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK, + 4, &ctrl); + ctrl &= ~PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK, + 4, ctrl); + } + + break; + } + + return 0; +} + +static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct device_node *dn = pci_device_to_OF_node(pdev); + uint64_t id = PCI_SLOT_ID(phb->opal_id, pci_dev_id(pdev)); + uint8_t scope; + int64_t rc; + + /* Hot reset to the bus if firmware cannot handle */ + if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL)) + return __pnv_eeh_bridge_reset(pdev, option); + + pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n", + __func__, pci_domain_nr(pdev->bus), + pdev->bus->number, option); + + switch (option) { + case EEH_RESET_FUNDAMENTAL: + scope = OPAL_RESET_PCI_FUNDAMENTAL; + break; + case EEH_RESET_HOT: + scope = OPAL_RESET_PCI_HOT; + break; + case EEH_RESET_DEACTIVATE: + return 0; + default: + dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n", + __func__, option); + return -EINVAL; + } + + rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET); + if (rc <= OPAL_SUCCESS) + goto out; + + rc = pnv_eeh_poll(id); +out: + return (rc == OPAL_SUCCESS) ? 0 : -EIO; +} + +void pnv_pci_reset_secondary_bus(struct pci_dev *dev) +{ + struct pci_controller *hose; + + if (pci_is_root_bus(dev->bus)) { + hose = pci_bus_to_host(dev->bus); + pnv_eeh_root_reset(hose, EEH_RESET_HOT); + pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); + } else { + pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); + pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); + } +} + +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, + int pos, u16 mask) +{ + struct eeh_dev *edev = pdn->edev; + int i, status = 0; + + /* Wait for Transaction Pending bit to be cleared */ + for (i = 0; i < 4; i++) { + eeh_ops->read_config(edev, pos, 2, &status); + if (!(status & mask)) + return; + + msleep((1 << i) * 100); + } + + pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n", + __func__, type, + pdn->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); +} + +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 reg = 0; + + if (WARN_ON(!edev->pcie_cap)) + return -ENOTTY; + + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + if (!(reg & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + pnv_eeh_wait_for_pending(pdn, "", + edev->pcie_cap + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_TRPND); + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg |= PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 cap = 0; + + if (WARN_ON(!edev->af_cap)) + return -ENOTTY; + + eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap); + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Wait for Transaction Pending bit to clear. A word-aligned + * test is used, so we use the control offset rather than status + * and shift the test bit to match. + */ + pnv_eeh_wait_for_pending(pdn, "AF", + edev->af_cap + PCI_AF_CTRL, + PCI_AF_STATUS_TP << 8); + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, + 1, PCI_AF_CTRL_FLR); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + int ret; + + /* The VF PE should have only one child device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pdn = eeh_dev_to_pdn(edev); + if (!pdn) + return -ENXIO; + + ret = pnv_eeh_do_flr(pdn, option); + if (!ret) + return ret; + + return pnv_eeh_do_af_flr(pdn, option); +} + +/** + * pnv_eeh_reset - Reset the specified PE + * @pe: EEH PE + * @option: reset option + * + * Do reset on the indicated PE. For PCI bus sensitive PE, + * we need to reset the parent p2p bridge. The PHB has to + * be reinitialized if the p2p bridge is root bridge. For + * PCI device sensitive PE, we will try to reset the device + * through FLR. For now, we don't have OPAL APIs to do HARD + * reset yet, so all reset would be SOFT (HOT) reset. + */ +static int pnv_eeh_reset(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb; + struct pci_bus *bus; + int64_t rc; + + /* + * For PHB reset, we always have complete reset. For those PEs whose + * primary bus derived from root complex (root bus) or root port + * (usually bus#1), we apply hot or fundamental reset on the root port. + * For other PEs, we always have hot reset on the PE primary bus. + * + * Here, we have different design to pHyp, which always clear the + * frozen state during PE reset. However, the good idea here from + * benh is to keep frozen state before we get PE reset done completely + * (until BAR restore). With the frozen state, HW drops illegal IO + * or MMIO access, which can incur recursive frozen PE during PE + * reset. The side effect is that EEH core has to clear the frozen + * state explicitly after BAR restore. + */ + if (pe->type & EEH_PE_PHB) + return pnv_eeh_phb_reset(hose, option); + + /* + * The frozen PE might be caused by PAPR error injection + * registers, which are expected to be cleared after hitting + * frozen PE as stated in the hardware spec. Unfortunately, + * that's not true on P7IOC. So we have to clear it manually + * to avoid recursive EEH errors during recovery. + */ + phb = hose->private_data; + if (phb->model == PNV_PHB_MODEL_P7IOC && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clearing error injection registers\n", + __func__, rc); + return -EIO; + } + } + + if (pe->type & EEH_PE_VF) + return pnv_eeh_reset_vf_pe(pe, option); + + bus = eeh_pe_bus_get(pe); + if (!bus) { + pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", + __func__, pe->phb->global_number, pe->addr); + return -EIO; + } + + if (pci_is_root_bus(bus)) + return pnv_eeh_root_reset(hose, option); + + /* + * For hot resets try use the generic PCI error recovery reset + * functions. These correctly handles the case where the secondary + * bus is behind a hotplug slot and it will use the slot provided + * reset methods to prevent spurious hotplug events during the reset. + * + * Fundamental resets need to be handled internally to EEH since the + * PCI core doesn't really have a concept of a fundamental reset, + * mainly because there's no standard way to generate one. Only a + * few devices require an FRESET so it should be fine. + */ + if (option != EEH_RESET_FUNDAMENTAL) { + /* + * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the + * de-assert step. It's like the OPAL reset API was + * poorly designed or something... + */ + if (option == EEH_RESET_DEACTIVATE) + return 0; + + rc = pci_bus_error_reset(bus->self); + if (!rc) + return 0; + } + + /* otherwise, use the generic bridge reset. this might call into FW */ + if (pci_is_root_bus(bus->parent)) + return pnv_eeh_root_reset(hose, option); + return pnv_eeh_bridge_reset(bus->self, option); +} + +/** + * pnv_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: temporary or permanent error log + * @drv_log: driver log to be combined with retrieved error log + * @len: length of driver log + * + * Retrieve the temporary or permanent error from the PE. + */ +static int pnv_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + + return 0; +} + +/** + * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE + * @pe: EEH PE + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int pnv_eeh_configure_bridge(struct eeh_pe *pe) +{ + return 0; +} + +/** + * pnv_pe_err_inject - Inject specified error to the indicated PE + * @pe: the indicated PE + * @type: error type + * @func: specific error type + * @addr: address + * @mask: address mask + * + * The routine is called to inject specified error, which is + * determined by @type and @func, to the indicated PE for + * testing purpose. + */ +static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + s64 rc; + + if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && + type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { + pr_warn("%s: Invalid error type %d\n", + __func__, type); + return -ERANGE; + } + + if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || + func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { + pr_warn("%s: Invalid error function %d\n", + __func__, func); + return -ERANGE; + } + + /* Firmware supports error injection ? */ + if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { + pr_warn("%s: Firmware doesn't support error injection\n", + __func__); + return -ENXIO; + } + + /* Do error injection */ + rc = opal_pci_err_inject(phb->opal_id, pe->addr, + type, func, addr, mask); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld injecting error " + "%d-%d to PHB#%x-PE#%x\n", + __func__, rc, type, func, + hose->global_number, pe->addr); + return -EIO; + } + + return 0; +} + +static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + + if (!edev || !edev->pe) + return false; + + /* + * We will issue FLR or AF FLR to all VFs, which are contained + * in VF PE. It relies on the EEH PCI config accessors. So we + * can't block them during the window. + */ + if (edev->physfn && (edev->pe->state & EEH_PE_RESET)) + return false; + + if (edev->pe->state & EEH_PE_CFG_BLOCKED) + return true; + + return false; +} + +static int pnv_eeh_read_config(struct eeh_dev *edev, + int where, int size, u32 *val) +{ + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (pnv_eeh_cfg_blocked(pdn)) { + *val = 0xFFFFFFFF; + return PCIBIOS_SET_FAILED; + } + + return pnv_pci_cfg_read(pdn, where, size, val); +} + +static int pnv_eeh_write_config(struct eeh_dev *edev, + int where, int size, u32 val) +{ + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (pnv_eeh_cfg_blocked(pdn)) + return PCIBIOS_SET_FAILED; + + return pnv_pci_cfg_write(pdn, where, size, val); +} + +static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data) +{ + /* GEM */ + if (data->gemXfir || data->gemRfir || + data->gemRirqfir || data->gemMask || data->gemRwof) + pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->gemXfir), + be64_to_cpu(data->gemRfir), + be64_to_cpu(data->gemRirqfir), + be64_to_cpu(data->gemMask), + be64_to_cpu(data->gemRwof)); + + /* LEM */ + if (data->lemFir || data->lemErrMask || + data->lemAction0 || data->lemAction1 || data->lemWof) + pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrMask), + be64_to_cpu(data->lemAction0), + be64_to_cpu(data->lemAction1), + be64_to_cpu(data->lemWof)); +} + +static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoP7IOCErrorData *data = + (struct OpalIoP7IOCErrorData*)phb->diag_data; + long rc; + + rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); + return; + } + + switch (be16_to_cpu(data->type)) { + case OPAL_P7IOC_DIAG_TYPE_RGC: + pr_info("P7IOC diag-data for RGC\n\n"); + pnv_eeh_dump_hub_diag_common(data); + if (data->rgc.rgcStatus || data->rgc.rgcLdcp) + pr_info(" RGC: %016llx %016llx\n", + be64_to_cpu(data->rgc.rgcStatus), + be64_to_cpu(data->rgc.rgcLdcp)); + break; + case OPAL_P7IOC_DIAG_TYPE_BI: + pr_info("P7IOC diag-data for BI %s\n\n", + data->bi.biDownbound ? "Downbound" : "Upbound"); + pnv_eeh_dump_hub_diag_common(data); + if (data->bi.biLdcp0 || data->bi.biLdcp1 || + data->bi.biLdcp2 || data->bi.biFenceStatus) + pr_info(" BI: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->bi.biLdcp0), + be64_to_cpu(data->bi.biLdcp1), + be64_to_cpu(data->bi.biLdcp2), + be64_to_cpu(data->bi.biFenceStatus)); + break; + case OPAL_P7IOC_DIAG_TYPE_CI: + pr_info("P7IOC diag-data for CI Port %d\n\n", + data->ci.ciPort); + pnv_eeh_dump_hub_diag_common(data); + if (data->ci.ciPortStatus || data->ci.ciPortLdcp) + pr_info(" CI: %016llx %016llx\n", + be64_to_cpu(data->ci.ciPortStatus), + be64_to_cpu(data->ci.ciPortLdcp)); + break; + case OPAL_P7IOC_DIAG_TYPE_MISC: + pr_info("P7IOC diag-data for MISC\n\n"); + pnv_eeh_dump_hub_diag_common(data); + break; + case OPAL_P7IOC_DIAG_TYPE_I2C: + pr_info("P7IOC diag-data for I2C\n\n"); + pnv_eeh_dump_hub_diag_common(data); + break; + default: + pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); + } +} + +static int pnv_eeh_get_pe(struct pci_controller *hose, + u16 pe_no, struct eeh_pe **pe) +{ + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pnv_pe; + struct eeh_pe *dev_pe; + + /* + * If PHB supports compound PE, to fetch + * the master PE because slave PE is invisible + * to EEH core. + */ + pnv_pe = &phb->ioda.pe_array[pe_no]; + if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { + pnv_pe = pnv_pe->master; + WARN_ON(!pnv_pe || + !(pnv_pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pnv_pe->pe_number; + } + + /* Find the PE according to PE# */ + dev_pe = eeh_pe_get(hose, pe_no); + if (!dev_pe) + return -EEXIST; + + /* Freeze the (compound) PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, pe_no); + + /* + * At this point, we're sure the (compound) PE should + * have been frozen. However, we still need poke until + * hitting the frozen PE on top level. + */ + dev_pe = dev_pe->parent; + while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { + int ret; + ret = eeh_ops->get_state(dev_pe, NULL); + if (ret <= 0 || eeh_state_active(ret)) { + dev_pe = dev_pe->parent; + continue; + } + + /* Frozen parent PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, dev_pe->addr); + + /* Next one */ + dev_pe = dev_pe->parent; + } + + return 0; +} + +/** + * pnv_eeh_next_error - Retrieve next EEH error to handle + * @pe: Affected PE + * + * The function is expected to be called by EEH core while it gets + * special EEH event (without binding PE). The function calls to + * OPAL APIs for next error to handle. The informational error is + * handled internally by platform. However, the dead IOC, dead PHB, + * fenced PHB and frozen PE should be handled by EEH core eventually. + */ +static int pnv_eeh_next_error(struct eeh_pe **pe) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + struct eeh_pe *phb_pe, *parent_pe; + __be64 frozen_pe_no; + __be16 err_type, severity; + long rc; + int state, ret = EEH_NEXT_ERR_NONE; + + /* + * While running here, it's safe to purge the event queue. The + * event should still be masked. + */ + eeh_remove_event(NULL, false); + + list_for_each_entry(hose, &hose_list, list_node) { + /* + * If the subordinate PCI buses of the PHB has been + * removed or is exactly under error recovery, we + * needn't take care of it any more. + */ + phb = hose->private_data; + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED)) + continue; + + rc = opal_pci_next_error(phb->opal_id, + &frozen_pe_no, &err_type, &severity); + if (rc != OPAL_SUCCESS) { + pr_devel("%s: Invalid return value on " + "PHB#%x (0x%lx) from opal_pci_next_error", + __func__, hose->global_number, rc); + continue; + } + + /* If the PHB doesn't have error, stop processing */ + if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || + be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { + pr_devel("%s: No error found on PHB#%x\n", + __func__, hose->global_number); + continue; + } + + /* + * Processing the error. We're expecting the error with + * highest priority reported upon multiple errors on the + * specific PHB. + */ + pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", + __func__, be16_to_cpu(err_type), + be16_to_cpu(severity), be64_to_cpu(frozen_pe_no), + hose->global_number); + switch (be16_to_cpu(err_type)) { + case OPAL_EEH_IOC_ERROR: + if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { + pr_err("EEH: dead IOC detected\n"); + ret = EEH_NEXT_ERR_DEAD_IOC; + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { + pr_info("EEH: IOC informative error " + "detected\n"); + pnv_eeh_get_and_dump_hub_diag(hose); + ret = EEH_NEXT_ERR_NONE; + } + + break; + case OPAL_EEH_PHB_ERROR: + if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { + *pe = phb_pe; + pr_err("EEH: dead PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); + ret = EEH_NEXT_ERR_DEAD_PHB; + } else if (be16_to_cpu(severity) == + OPAL_EEH_SEV_PHB_FENCED) { + *pe = phb_pe; + pr_err("EEH: Fenced PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); + ret = EEH_NEXT_ERR_FENCED_PHB; + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { + pr_info("EEH: PHB#%x informative error " + "detected, location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); + pnv_eeh_get_phb_diag(phb_pe); + pnv_pci_dump_phb_diag_data(hose, phb_pe->data); + ret = EEH_NEXT_ERR_NONE; + } + + break; + case OPAL_EEH_PE_ERROR: + /* + * If we can't find the corresponding PE, we + * just try to unfreeze. + */ + if (pnv_eeh_get_pe(hose, + be64_to_cpu(frozen_pe_no), pe)) { + pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", + hose->global_number, be64_to_cpu(frozen_pe_no)); + pr_info("EEH: PHB location: %s\n", + eeh_pe_loc_get(phb_pe)); + + /* Dump PHB diag-data */ + rc = opal_pci_get_phb_diag_data2(phb->opal_id, + phb->diag_data, phb->diag_data_size); + if (rc == OPAL_SUCCESS) + pnv_pci_dump_phb_diag_data(hose, + phb->diag_data); + + /* Try best to clear it */ + opal_pci_eeh_freeze_clear(phb->opal_id, + be64_to_cpu(frozen_pe_no), + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + ret = EEH_NEXT_ERR_NONE; + } else if ((*pe)->state & EEH_PE_ISOLATED || + eeh_pe_passed(*pe)) { + ret = EEH_NEXT_ERR_NONE; + } else { + pr_err("EEH: Frozen PE#%x " + "on PHB#%x detected\n", + (*pe)->addr, + (*pe)->phb->global_number); + pr_err("EEH: PE location: %s, " + "PHB location: %s\n", + eeh_pe_loc_get(*pe), + eeh_pe_loc_get(phb_pe)); + ret = EEH_NEXT_ERR_FROZEN_PE; + } + + break; + default: + pr_warn("%s: Unexpected error type %d\n", + __func__, be16_to_cpu(err_type)); + } + + /* + * EEH core will try recover from fenced PHB or + * frozen PE. In the time for frozen PE, EEH core + * enable IO path for that before collecting logs, + * but it ruins the site. So we have to dump the + * log in advance here. + */ + if ((ret == EEH_NEXT_ERR_FROZEN_PE || + ret == EEH_NEXT_ERR_FENCED_PHB) && + !((*pe)->state & EEH_PE_ISOLATED)) { + eeh_pe_mark_isolated(*pe); + pnv_eeh_get_phb_diag(*pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data((*pe)->phb, + (*pe)->data); + } + + /* + * We probably have the frozen parent PE out there and + * we need have to handle frozen parent PE firstly. + */ + if (ret == EEH_NEXT_ERR_FROZEN_PE) { + parent_pe = (*pe)->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + state = eeh_ops->get_state(parent_pe, NULL); + if (state > 0 && !eeh_state_active(state)) + *pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + + /* We possibly migrate to another PE */ + eeh_pe_mark_isolated(*pe); + } + + /* + * If we have no errors on the specific PHB or only + * informative error there, we continue poking it. + * Otherwise, we need actions to be taken by upper + * layer. + */ + if (ret > EEH_NEXT_ERR_INF) + break; + } + + /* Unmask the event */ + if (ret == EEH_NEXT_ERR_NONE && eeh_enabled()) + enable_irq(eeh_event_irq); + + return ret; +} + +static int pnv_eeh_restore_config(struct eeh_dev *edev) +{ + struct pnv_phb *phb; + s64 ret = 0; + + if (!edev) + return -EEXIST; + + if (edev->physfn) + return 0; + + phb = edev->controller->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->bdfn); + + if (ret) { + pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", + __func__, edev->bdfn, ret); + return -EIO; + } + + return ret; +} + +static struct eeh_ops pnv_eeh_ops = { + .name = "powernv", + .probe = pnv_eeh_probe, + .set_option = pnv_eeh_set_option, + .get_state = pnv_eeh_get_state, + .reset = pnv_eeh_reset, + .get_log = pnv_eeh_get_log, + .configure_bridge = pnv_eeh_configure_bridge, + .err_inject = pnv_eeh_err_inject, + .read_config = pnv_eeh_read_config, + .write_config = pnv_eeh_write_config, + .next_error = pnv_eeh_next_error, + .restore_config = pnv_eeh_restore_config, + .notify_resume = NULL +}; + +/** + * eeh_powernv_init - Register platform dependent EEH operations + * + * EEH initialization on powernv platform. This function should be + * called before any EEH related functions. + */ +static int __init eeh_powernv_init(void) +{ + int max_diag_size = PNV_PCI_DIAG_BUF_SIZE; + struct pci_controller *hose; + struct pnv_phb *phb; + int ret = -EINVAL; + + if (!firmware_has_feature(FW_FEATURE_OPAL)) { + pr_warn("%s: OPAL is required !\n", __func__); + return -EINVAL; + } + + /* Set probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEV); + + /* + * P7IOC blocks PCI config access to frozen PE, but PHB3 + * doesn't do that. So we have to selectively enable I/O + * prior to collecting error log. + */ + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->model == PNV_PHB_MODEL_P7IOC) + eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); + + if (phb->diag_data_size > max_diag_size) + max_diag_size = phb->diag_data_size; + + break; + } + + /* + * eeh_init() allocates the eeh_pe and its aux data buf so the + * size needs to be set before calling eeh_init(). + */ + eeh_set_pe_aux_size(max_diag_size); + ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device; + + ret = eeh_init(&pnv_eeh_ops); + if (!ret) + pr_info("EEH: PowerNV platform initialized\n"); + else + pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret); + + return ret; +} +machine_arch_initcall(powernv, eeh_powernv_init); diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c new file mode 100644 index 0000000000..ad41dffe4d --- /dev/null +++ b/arch/powerpc/platforms/powernv/idle.c @@ -0,0 +1,1507 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV cpuidle code + * + * Copyright 2015 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "powernv.h" +#include "subcore.h" + +/* Power ISA 3.0 allows for stop states 0x0 - 0xF */ +#define MAX_STOP_STATE 0xF + +#define P9_STOP_SPR_MSR 2000 +#define P9_STOP_SPR_PSSCR 855 + +static u32 supported_cpuidle_states; +struct pnv_idle_states_t *pnv_idle_states; +int nr_pnv_idle_states; + +/* + * The default stop state that will be used by ppc_md.power_save + * function on platforms that support stop instruction. + */ +static u64 pnv_default_stop_val; +static u64 pnv_default_stop_mask; +static bool default_stop_found; + +/* + * First stop state levels when SPR and TB loss can occur. + */ +static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; +static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; + +/* + * psscr value and mask of the deepest stop idle state. + * Used when a cpu is offlined. + */ +static u64 pnv_deepest_stop_psscr_val; +static u64 pnv_deepest_stop_psscr_mask; +static u64 pnv_deepest_stop_flag; +static bool deepest_stop_found; + +static unsigned long power7_offline_type; + +static int __init pnv_save_sprs_for_deep_states(void) +{ + int cpu; + int rc; + + /* + * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across + * all cpus at boot. Get these reg values of current cpu and use the + * same across all cpus. + */ + uint64_t lpcr_val = mfspr(SPRN_LPCR); + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hmeer_val = mfspr(SPRN_HMEER); + uint64_t msr_val = MSR_IDLE; + uint64_t psscr_val = pnv_deepest_stop_psscr_val; + + for_each_present_cpu(cpu) { + uint64_t pir = get_hard_smp_processor_id(cpu); + uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; + + rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + if (rc != 0) + return rc; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); + if (rc) + return rc; + + rc = opal_slw_set_reg(pir, + P9_STOP_SPR_PSSCR, psscr_val); + + if (rc) + return rc; + } + + /* HIDs are per core registers */ + if (cpu_thread_in_core(cpu) == 0) { + + rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); + if (rc != 0) + return rc; + + /* Only p8 needs to set extra HID registers */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); + if (rc != 0) + return rc; + + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); + if (rc != 0) + return rc; + } + } + } + + return 0; +} + +u32 pnv_get_supported_cpuidle_states(void) +{ + return supported_cpuidle_states; +} +EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); + +static void pnv_fastsleep_workaround_apply(void *info) + +{ + int cpu = smp_processor_id(); + int rc; + int *err = info; + + if (cpu_first_thread_sibling(cpu) != cpu) + return; + + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + if (rc) + *err = 1; +} + +static bool power7_fastsleep_workaround_entry = true; +static bool power7_fastsleep_workaround_exit = true; + +/* + * Used to store fastsleep workaround state + * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) + * 1 - Workaround applied once, never undone. + */ +static u8 fastsleep_workaround_applyonce; + +static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); +} + +static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + int err; + u8 val; + + if (kstrtou8(buf, 0, &val) || val != 1) + return -EINVAL; + + if (fastsleep_workaround_applyonce == 1) + return count; + + /* + * fastsleep_workaround_applyonce = 1 implies + * fastsleep workaround needs to be left in 'applied' state on all + * the cores. Do this by- + * 1. Disable the 'undo' workaround in fastsleep exit path + * 2. Sendi IPIs to all the cores which have at least one online thread + * 3. Disable the 'apply' workaround in fastsleep entry path + * + * There is no need to send ipi to cores which have all threads + * offlined, as last thread of the core entering fastsleep or deeper + * state would have applied workaround. + */ + power7_fastsleep_workaround_exit = false; + + cpus_read_lock(); + on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); + cpus_read_unlock(); + if (err) { + pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); + goto fail; + } + + power7_fastsleep_workaround_entry = false; + + fastsleep_workaround_applyonce = 1; + + return count; +fail: + return -EIO; +} + +static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, + show_fastsleep_workaround_applyonce, + store_fastsleep_workaround_applyonce); + +static inline void atomic_start_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + clear_bit(thread_nr, state); +} + +static inline void atomic_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + set_bit(thread_nr, state); +} + +static inline void atomic_lock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *lock = &paca_ptrs[first]->idle_lock; + + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) + barrier(); +} + +static inline void atomic_unlock_and_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long *lock = &paca_ptrs[first]->idle_lock; + u64 s = READ_ONCE(*state); + u64 new, tmp; + + BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); + BUG_ON(s & thread); + +again: + new = s | thread; + tmp = cmpxchg(state, s, new); + if (unlikely(tmp != s)) { + s = tmp; + goto again; + } + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); +} + +static inline void atomic_unlock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *lock = &paca_ptrs[first]->idle_lock; + + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); +} + +/* P7 and P8 */ +struct p7_sprs { + /* per core */ + u64 tscr; + u64 worc; + + /* per subcore */ + u64 sdr1; + u64 rpr; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 purr; + u64 spurr; + u64 dscr; + u64 wort; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 uamor; + /* amor is restored to constant ~0 */ +}; + +static unsigned long power7_idle_insn(unsigned long type) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; + unsigned long srr1; + bool full_winkle; + struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ + bool sprs_saved = false; + int rc; + + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + + BUG_ON(!(*state & thread)); + *state &= ~thread; + + if (power7_fastsleep_workaround_entry) { + if ((*state & core_thread_mask) == 0) { + rc = opal_config_cpu_idle_state( + OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + BUG_ON(rc); + } + } + + if (type == PNV_THREAD_WINKLE) { + sprs.tscr = mfspr(SPRN_TSCR); + sprs.worc = mfspr(SPRN_WORC); + + sprs.sdr1 = mfspr(SPRN_SDR1); + sprs.rpr = mfspr(SPRN_RPR); + + sprs.lpcr = mfspr(SPRN_LPCR); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + } + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.wort = mfspr(SPRN_WORT); + + sprs_saved = true; + + /* + * Increment winkle counter and set all winkle bits if + * all threads are winkling. This allows wakeup side to + * distinguish between fast sleep and winkle state + * loss. Fast sleep still has to resync the timebase so + * this may not be a really big win. + */ + *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) + >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT + == threads_per_core) + *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + } + + atomic_unlock_thread_idle(); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.uamor = mfspr(SPRN_UAMOR); + } + + local_paca->thread_idle_state = type; + srr1 = isa206_idle_insn_mayloss(type); /* go idle */ + local_paca->thread_idle_state = PNV_THREAD_RUNNING; + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + /* + * We don't need an isync after the mtsprs here because + * the upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, ~0); + mtspr(SPRN_UAMOR, sprs.uamor); + } + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + } + atomic_unlock_and_stop_thread_idle(); + } + return srr1; + } + + /* HV state loss */ + BUG_ON(type == PNV_THREAD_NAP); + + atomic_lock_thread_idle(); + + full_winkle = false; + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + full_winkle = true; + BUG_ON(!sprs_saved); + } + } + + WARN_ON(*state & thread); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + if (full_winkle) { + mtspr(SPRN_TSCR, sprs.tscr); + mtspr(SPRN_WORC, sprs.worc); + } + + if (power7_fastsleep_workaround_exit) { + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_UNDO); + BUG_ON(rc); + } + + /* TB */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + +core_woken: + if (!full_winkle) + goto subcore_woken; + + if ((*state & local_paca->subcore_sibling_mask) != 0) + goto subcore_woken; + + /* Per-subcore SPRs */ + mtspr(SPRN_SDR1, sprs.sdr1); + mtspr(SPRN_RPR, sprs.rpr); + +subcore_woken: + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + atomic_unlock_and_stop_thread_idle(); + + /* Fast sleep does not lose SPRs */ + if (!full_winkle) + return srr1; + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + } + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_WORT, sprs.wort); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + +#ifdef CONFIG_PPC_64S_HASH_MMU + /* + * The SLB has to be restored here, but it sometimes still + * contains entries, so the __ variant must be used to prevent + * multi hits. + */ + __slb_restore_bolted_realmode(); +#endif + + return srr1; +} + +extern unsigned long idle_kvm_start_guest(unsigned long srr1); + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long power7_offline(void) +{ + unsigned long srr1; + + mtmsr(MSR_IDLE); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle. */ + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in */ + /* pnv_powersave_wakeup in this file. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; +#endif + + __ppc64_runlatch_off(); + srr1 = power7_idle_insn(power7_offline_type); + __ppc64_runlatch_on(); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; + /* Order setting hwthread_state vs. testing hwthread_req */ + smp_mb(); + if (local_paca->kvm_hstate.hwthread_req) + srr1 = idle_kvm_start_guest(srr1); +#endif + + mtmsr(MSR_KERNEL); + + return srr1; +} +#endif + +void power7_idle_type(unsigned long type) +{ + unsigned long srr1; + + if (!prep_irq_for_idle_irqsoff()) + return; + + mtmsr(MSR_IDLE); + __ppc64_runlatch_off(); + srr1 = power7_idle_insn(type); + __ppc64_runlatch_on(); + mtmsr(MSR_KERNEL); + + fini_irq_for_idle_irqsoff(); + irq_set_pending_from_srr1(srr1); +} + +static void power7_idle(void) +{ + if (!powersave_nap) + return; + + power7_idle_type(PNV_THREAD_NAP); +} + +struct p9_sprs { + /* per core */ + u64 ptcr; + u64 rpr; + u64 tscr; + u64 ldbar; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 pid; + u64 purr; + u64 spurr; + u64 dscr; + u64 ciabr; + + u64 mmcra; + u32 mmcr0; + u32 mmcr1; + u64 mmcr2; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 amor; + u64 uamor; +}; + +static unsigned long power9_idle_stop(unsigned long psscr) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; + unsigned long srr1; + unsigned long pls; + unsigned long mmcr0 = 0; + unsigned long mmcra = 0; + struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ + bool sprs_saved = false; + + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { + /* EC=ESL=0 case */ + + /* + * Wake synchronously. SRESET via xscom may still cause + * a 0x100 powersave wakeup with SRR1 reason! + */ + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ + if (likely(!srr1)) + return 0; + + /* + * Registers not saved, can't recover! + * This would be a hardware bug + */ + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); + + goto out; + } + + /* EC=ESL=1 case */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { + local_paca->requested_psscr = psscr; + /* order setting requested_psscr vs testing dont_stop */ + smp_mb(); + if (atomic_read(&local_paca->dont_stop)) { + local_paca->requested_psscr = 0; + return 0; + } + } +#endif + + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + /* + * POWER9 DD2 can incorrectly set PMAO when waking up + * after a state-loss idle. Saving and restoring MMCR0 + * over idle is a workaround. + */ + mmcr0 = mfspr(SPRN_MMCR0); + } + + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { + sprs.lpcr = mfspr(SPRN_LPCR); + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + sprs.pid = mfspr(SPRN_PID); + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.ciabr = mfspr(SPRN_CIABR); + + sprs.mmcra = mfspr(SPRN_MMCRA); + sprs.mmcr0 = mfspr(SPRN_MMCR0); + sprs.mmcr1 = mfspr(SPRN_MMCR1); + sprs.mmcr2 = mfspr(SPRN_MMCR2); + + sprs.ptcr = mfspr(SPRN_PTCR); + sprs.rpr = mfspr(SPRN_RPR); + sprs.tscr = mfspr(SPRN_TSCR); + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + sprs.ldbar = mfspr(SPRN_LDBAR); + + sprs_saved = true; + + atomic_start_thread_idle(); + } + + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.uamor = mfspr(SPRN_UAMOR); + + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->requested_psscr = 0; +#endif + + psscr = mfspr(SPRN_PSSCR); + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + /* + * We don't need an isync after the mtsprs here because the + * upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, ~0); + mtspr(SPRN_UAMOR, sprs.uamor); + + /* + * Workaround for POWER9 DD2.0, if we lost resources, the ERAT + * might have been corrupted and needs flushing. We also need + * to reload MMCR0 (see mmcr0 comment above). + */ + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); + mtspr(SPRN_MMCR0, mmcr0); + } + + /* + * DD2.2 and earlier need to set then clear bit 60 in MMCRA + * to ensure the PMU starts running. + */ + mmcra = mfspr(SPRN_MMCRA); + mmcra |= PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + mmcra &= ~PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + /* + * On POWER9, SRR1 bits do not match exactly as expected. + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so + * just always test PSSCR for SPR/TB state loss. + */ + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; + if (likely(pls < deep_spr_loss_state)) { + if (sprs_saved) + atomic_stop_thread_idle(); + goto out; + } + + /* HV state loss */ + BUG_ON(!sprs_saved); + + atomic_lock_thread_idle(); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + mtspr(SPRN_PTCR, sprs.ptcr); + mtspr(SPRN_RPR, sprs.rpr); + mtspr(SPRN_TSCR, sprs.tscr); + + if (pls >= pnv_first_tb_loss_level) { + /* TB loss */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + } + + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + +core_woken: + atomic_unlock_and_stop_thread_idle(); + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + mtspr(SPRN_PID, sprs.pid); + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_CIABR, sprs.ciabr); + + mtspr(SPRN_MMCRA, sprs.mmcra); + mtspr(SPRN_MMCR0, sprs.mmcr0); + mtspr(SPRN_MMCR1, sprs.mmcr1); + mtspr(SPRN_MMCR2, sprs.mmcr2); + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + mtspr(SPRN_LDBAR, sprs.ldbar); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + + if (!radix_enabled()) + __slb_restore_bolted_realmode(); + +out: + mtmsr(MSR_KERNEL); + + return srr1; +} + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +/* + * This is used in working around bugs in thread reconfiguration + * on POWER9 (at least up to Nimbus DD2.2) relating to transactional + * memory and the way that XER[SO] is checkpointed. + * This function forces the core into SMT4 in order by asking + * all other threads not to stop, and sending a message to any + * that are in a stop state. + * Must be called with preemption disabled. + */ +void pnv_power9_force_smt4_catch(void) +{ + int cpu, cpu0, thr; + int awake_threads = 1; /* this thread is awake */ + int poke_threads = 0; + int need_awake = threads_per_core; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); + } + /* order setting dont_stop vs testing requested_psscr */ + smp_mb(); + for (thr = 0; thr < threads_per_core; ++thr) { + if (!paca_ptrs[cpu0+thr]->requested_psscr) + ++awake_threads; + else + poke_threads |= (1 << thr); + } + + /* If at least 3 threads are awake, the core is in SMT4 already */ + if (awake_threads < need_awake) { + /* We have to wake some threads; we'll use msgsnd */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (poke_threads & (1 << thr)) { + ppc_msgsnd_sync(); + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, + paca_ptrs[cpu0+thr]->hw_cpu_id); + } + } + /* now spin until at least 3 threads are awake */ + do { + for (thr = 0; thr < threads_per_core; ++thr) { + if ((poke_threads & (1 << thr)) && + !paca_ptrs[cpu0+thr]->requested_psscr) { + ++awake_threads; + poke_threads &= ~(1 << thr); + } + } + } while (awake_threads < need_awake); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); + +void pnv_power9_force_smt4_release(void) +{ + int cpu, cpu0, thr; + + cpu = smp_processor_id(); + cpu0 = cpu & ~(threads_per_core - 1); + + /* clear all the dont_stop flags */ + for (thr = 0; thr < threads_per_core; ++thr) { + if (cpu != cpu0 + thr) + atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); + } +} +EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + +struct p10_sprs { + /* + * SPRs that get lost in shallow states: + * + * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 + * isa300 idle routines restore CR, LR. + * CTR is volatile + * idle thread doesn't use FP or VEC + * kernel doesn't use TAR + * HSPRG1 is only live in HV interrupt entry + * SPRG2 is only live in KVM guests, KVM handles it. + */ +}; + +static unsigned long power10_idle_stop(unsigned long psscr) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; + unsigned long srr1; + unsigned long pls; +// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ + bool sprs_saved = false; + + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { + /* EC=ESL=0 case */ + + /* + * Wake synchronously. SRESET via xscom may still cause + * a 0x100 powersave wakeup with SRR1 reason! + */ + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ + if (likely(!srr1)) + return 0; + + /* + * Registers not saved, can't recover! + * This would be a hardware bug + */ + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); + + goto out; + } + + /* EC=ESL=1 case */ + if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { + /* XXX: save SPRs for deep state loss here. */ + + sprs_saved = true; + + atomic_start_thread_idle(); + } + + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ + + psscr = mfspr(SPRN_PSSCR); + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + /* + * On POWER10, SRR1 bits do not match exactly as expected. + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so + * just always test PSSCR for SPR/TB state loss. + */ + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; + if (likely(pls < deep_spr_loss_state)) { + if (sprs_saved) + atomic_stop_thread_idle(); + goto out; + } + + /* HV state loss */ + BUG_ON(!sprs_saved); + + atomic_lock_thread_idle(); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* XXX: restore per-core SPRs here */ + + if (pls >= pnv_first_tb_loss_level) { + /* TB loss */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + } + + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + +core_woken: + atomic_unlock_and_stop_thread_idle(); + + /* XXX: restore per-thread SPRs here */ + + if (!radix_enabled()) + __slb_restore_bolted_realmode(); + +out: + mtmsr(MSR_KERNEL); + + return srr1; +} + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long arch300_offline_stop(unsigned long psscr) +{ + unsigned long srr1; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + srr1 = power10_idle_stop(psscr); + else + srr1 = power9_idle_stop(psscr); + + return srr1; +} +#endif + +void arch300_idle_type(unsigned long stop_psscr_val, + unsigned long stop_psscr_mask) +{ + unsigned long psscr; + unsigned long srr1; + + if (!prep_irq_for_idle_irqsoff()) + return; + + psscr = mfspr(SPRN_PSSCR); + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + + __ppc64_runlatch_off(); + if (cpu_has_feature(CPU_FTR_ARCH_31)) + srr1 = power10_idle_stop(psscr); + else + srr1 = power9_idle_stop(psscr); + __ppc64_runlatch_on(); + + fini_irq_for_idle_irqsoff(); + + irq_set_pending_from_srr1(srr1); +} + +/* + * Used for ppc_md.power_save which needs a function with no parameters + */ +static void arch300_idle(void) +{ + arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); +} + +#ifdef CONFIG_HOTPLUG_CPU + +void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) +{ + u64 pir = get_hard_smp_processor_id(cpu); + + mtspr(SPRN_LPCR, lpcr_val); + + /* + * Program the LPCR via stop-api only if the deepest stop state + * can lose hypervisor context. + */ + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) + opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); +} + +/* + * pnv_cpu_offline: A function that puts the CPU into the deepest + * available platform idle state on a CPU-Offline. + * interrupts hard disabled and no lazy irq pending. + */ +unsigned long pnv_cpu_offline(unsigned int cpu) +{ + unsigned long srr1; + + __ppc64_runlatch_off(); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { + unsigned long psscr; + + psscr = mfspr(SPRN_PSSCR); + psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | + pnv_deepest_stop_psscr_val; + srr1 = arch300_offline_stop(psscr); + } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { + srr1 = power7_offline(); + } else { + /* This is the fallback method. We emulate snooze */ + while (!generic_check_cpu_restart(cpu)) { + HMT_low(); + HMT_very_low(); + } + srr1 = 0; + HMT_medium(); + } + + __ppc64_runlatch_on(); + + return srr1; +} +#endif + +/* + * Power ISA 3.0 idle initialization. + * + * POWER ISA 3.0 defines a new SPR Processor stop Status and Control + * Register (PSSCR) to control idle behavior. + * + * PSSCR layout: + * ---------------------------------------------------------- + * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | + * ---------------------------------------------------------- + * 0 4 41 42 43 44 48 54 56 60 + * + * PSSCR key fields: + * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the + * lowest power-saving state the thread entered since stop instruction was + * last executed. + * + * Bit 41 - Status Disable(SD) + * 0 - Shows PLS entries + * 1 - PLS entries are all 0 + * + * Bit 42 - Enable State Loss + * 0 - No state is lost irrespective of other fields + * 1 - Allows state loss + * + * Bit 43 - Exit Criterion + * 0 - Exit from power-save mode on any interrupt + * 1 - Exit from power-save mode controlled by LPCR's PECE bits + * + * Bits 44:47 - Power-Saving Level Limit + * This limits the power-saving level that can be entered into. + * + * Bits 60:63 - Requested Level + * Used to specify which power-saving level must be entered on executing + * stop instruction + */ + +int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) +{ + int err = 0; + + /* + * psscr_mask == 0xf indicates an older firmware. + * Set remaining fields of psscr to the default values. + * See NOTE above definition of PSSCR_HV_DEFAULT_VAL + */ + if (*psscr_mask == 0xf) { + *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; + *psscr_mask = PSSCR_HV_DEFAULT_MASK; + return err; + } + + /* + * New firmware is expected to set the psscr_val bits correctly. + * Validate that the following invariants are correctly maintained by + * the new firmware. + * - ESL bit value matches the EC bit value. + * - ESL bit is set for all the deep stop states. + */ + if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { + err = ERR_EC_ESL_MISMATCH; + } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && + GET_PSSCR_ESL(*psscr_val) == 0) { + err = ERR_DEEP_STATE_ESL_MISMATCH; + } + + return err; +} + +/* + * pnv_arch300_idle_init: Initializes the default idle state, first + * deep idle state and deepest idle state on + * ISA 3.0 CPUs. + * + * @np: /ibm,opal/power-mgt device node + * @flags: cpu-idle-state-flags array + * @dt_idle_states: Number of idle state entries + * Returns 0 on success + */ +static void __init pnv_arch300_idle_init(void) +{ + u64 max_residency_ns = 0; + int i; + + /* stop is not really architected, we only have p9,p10 drivers */ + if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9)) + return; + + /* + * pnv_deepest_stop_{val,mask} should be set to values corresponding to + * the deepest stop state. + * + * pnv_default_stop_{val,mask} should be set to values corresponding to + * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. + */ + pnv_first_tb_loss_level = MAX_STOP_STATE + 1; + deep_spr_loss_state = MAX_STOP_STATE + 1; + for (i = 0; i < nr_pnv_idle_states; i++) { + int err; + struct pnv_idle_states_t *state = &pnv_idle_states[i]; + u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; + + /* No deep loss driver implemented for POWER10 yet */ + if (pvr_version_is(PVR_POWER10) && + state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) + continue; + + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (pnv_first_tb_loss_level > psscr_rl)) + pnv_first_tb_loss_level = psscr_rl; + + if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; + + /* + * The idle code does not deal with TB loss occurring + * in a shallower state than SPR loss, so force it to + * behave like SPRs are lost if TB is lost. POWER9 would + * never encounter this, but a POWER8 core would if it + * implemented the stop instruction. So this is for forward + * compatibility. + */ + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (deep_spr_loss_state > psscr_rl)) + deep_spr_loss_state = psscr_rl; + + err = validate_psscr_val_mask(&state->psscr_val, + &state->psscr_mask, + state->flags); + if (err) { + report_invalid_psscr_val(state->psscr_val, err); + continue; + } + + state->valid = true; + + if (max_residency_ns < state->residency_ns) { + max_residency_ns = state->residency_ns; + pnv_deepest_stop_psscr_val = state->psscr_val; + pnv_deepest_stop_psscr_mask = state->psscr_mask; + pnv_deepest_stop_flag = state->flags; + deepest_stop_found = true; + } + + if (!default_stop_found && + (state->flags & OPAL_PM_STOP_INST_FAST)) { + pnv_default_stop_val = state->psscr_val; + pnv_default_stop_mask = state->psscr_mask; + default_stop_found = true; + WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); + } + } + + if (unlikely(!default_stop_found)) { + pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); + } else { + ppc_md.power_save = arch300_idle; + pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", + pnv_default_stop_val, pnv_default_stop_mask); + } + + if (unlikely(!deepest_stop_found)) { + pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); + } else { + pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", + pnv_deepest_stop_psscr_val, + pnv_deepest_stop_psscr_mask); + } + + pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", + deep_spr_loss_state); + + pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", + pnv_first_tb_loss_level); +} + +static void __init pnv_disable_deep_states(void) +{ + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } +} + +/* + * Probe device tree for supported idle states + */ +static void __init pnv_probe_idle_states(void) +{ + int i; + + if (nr_pnv_idle_states < 0) { + pr_warn("cpuidle-powernv: no idle states found in the DT\n"); + return; + } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pnv_arch300_idle_init(); + + for (i = 0; i < nr_pnv_idle_states; i++) + supported_cpuidle_states |= pnv_idle_states[i].flags; +} + +/* + * This function parses device-tree and populates all the information + * into pnv_idle_states structure. It also sets up nr_pnv_idle_states + * which is the number of cpuidle states discovered through device-tree. + */ + +static int __init pnv_parse_cpuidle_dt(void) +{ + struct device_node *np; + int nr_idle_states, i; + int rc = 0; + u32 *temp_u32; + u64 *temp_u64; + const char **temp_string; + + np = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!np) { + pr_warn("opal: PowerMgmt Node not found\n"); + return -ENODEV; + } + nr_idle_states = of_property_count_u32_elems(np, + "ibm,cpu-idle-state-flags"); + + pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), + GFP_KERNEL); + temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); + temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); + temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); + + if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { + pr_err("Could not allocate memory for dt parsing\n"); + rc = -ENOMEM; + goto out; + } + + /* Read flags */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].flags = temp_u32[i]; + + /* Read latencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].latency_ns = temp_u32[i]; + + /* Read residencies */ + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", + temp_u32, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].residency_ns = temp_u32[i]; + + /* For power9 and later */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Read pm_crtl_val */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_val = temp_u64[i]; + + /* Read pm_crtl_mask */ + if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", + temp_u64, nr_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + pnv_idle_states[i].psscr_mask = temp_u64[i]; + } + + /* + * power8 specific properties ibm,cpu-idle-state-pmicr-mask and + * ibm,cpu-idle-state-pmicr-val were never used and there is no + * plan to use it in near future. Hence, not parsing these properties + */ + + if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", + temp_string, nr_idle_states) < 0) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); + rc = -EINVAL; + goto out; + } + for (i = 0; i < nr_idle_states; i++) + strscpy(pnv_idle_states[i].name, temp_string[i], + PNV_IDLE_NAME_LEN); + nr_pnv_idle_states = nr_idle_states; + rc = 0; +out: + kfree(temp_u32); + kfree(temp_u64); + kfree(temp_string); + of_node_put(np); + return rc; +} + +static int __init pnv_init_idle_states(void) +{ + int cpu; + int rc = 0; + + /* Set up PACA fields */ + for_each_present_cpu(cpu) { + struct paca_struct *p = paca_ptrs[cpu]; + + p->idle_state = 0; + if (cpu == cpu_first_thread_sibling(cpu)) + p->idle_state = (1 << threads_per_core) - 1; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* P7/P8 nap */ + p->thread_idle_state = PNV_THREAD_RUNNING; + } else if (pvr_version_is(PVR_POWER9)) { + /* P9 stop workarounds */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + p->requested_psscr = 0; + atomic_set(&p->dont_stop, 0); +#endif + } + } + + /* In case we error out nr_pnv_idle_states will be zero */ + nr_pnv_idle_states = 0; + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + goto out; + rc = pnv_parse_cpuidle_dt(); + if (rc) + return rc; + pnv_probe_idle_states(); + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + power7_fastsleep_workaround_entry = false; + power7_fastsleep_workaround_exit = false; + } else { + struct device *dev_root; + /* + * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that + * workaround is needed to use fastsleep. Provide sysfs + * control to choose how this workaround has to be + * applied. + */ + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + device_create_file(dev_root, + &dev_attr_fastsleep_workaround_applyonce); + put_device(dev_root); + } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { + ppc_md.power_save = power7_idle; + power7_offline_type = PNV_THREAD_NAP; + } + + if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && + (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) + power7_offline_type = PNV_THREAD_WINKLE; + else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || + (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + power7_offline_type = PNV_THREAD_SLEEP; + } + + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + if (pnv_save_sprs_for_deep_states()) + pnv_disable_deep_states(); + } + +out: + return 0; +} +machine_subsys_initcall(powernv, pnv_init_idle_states); diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c new file mode 100644 index 0000000000..877720c645 --- /dev/null +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) IBM Corporation, 2014, 2017 + * Anton Blanchard, Rashmica Gupta. + */ + +#define pr_fmt(fmt) "memtrace: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This enables us to keep track of the memory removed from each node. */ +struct memtrace_entry { + void *mem; + u64 start; + u64 size; + u32 nid; + struct dentry *dir; + char name[16]; +}; + +static DEFINE_MUTEX(memtrace_mutex); +static u64 memtrace_size; + +static struct memtrace_entry *memtrace_array; +static unsigned int memtrace_array_nr; + + +static ssize_t memtrace_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct memtrace_entry *ent = filp->private_data; + + return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); +} + +static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct memtrace_entry *ent = filp->private_data; + + if (ent->size < vma->vm_end - vma->vm_start) + return -EINVAL; + + if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +} + +static const struct file_operations memtrace_fops = { + .llseek = default_llseek, + .read = memtrace_read, + .open = simple_open, + .mmap = memtrace_mmap, +}; + +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, i + chunk)); + cond_resched(); + } +} + +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn; + + /* As HIGHMEM does not apply, use clear_page() directly. */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); + } + /* + * Before we go ahead and use this range as cache inhibited range + * flush the cache. + */ + flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn), + (unsigned long)pfn_to_kaddr(start_pfn + nr_pages), + FLUSH_CHUNK_SIZE); +} + +static u64 memtrace_alloc_node(u32 nid, u64 size) +{ + const unsigned long nr_pages = PHYS_PFN(size); + unsigned long pfn, start_pfn; + struct page *page; + + /* + * Trace memory needs to be aligned to the size, which is guaranteed + * by alloc_contig_pages(). + */ + page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | + __GFP_NOWARN, nid, NULL); + if (!page) + return 0; + start_pfn = page_to_pfn(page); + + /* + * Clear the range while we still have a linear mapping. + * + * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. + */ + memtrace_clear_range(start_pfn, nr_pages); + + /* + * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, + * dumping, ...) should be touching these pages. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __SetPageOffline(pfn_to_page(pfn)); + + arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); + + return PFN_PHYS(start_pfn); +} + +static int memtrace_init_regions_runtime(u64 size) +{ + u32 nid; + u64 m; + + memtrace_array = kcalloc(num_online_nodes(), + sizeof(struct memtrace_entry), GFP_KERNEL); + if (!memtrace_array) { + pr_err("Failed to allocate memtrace_array\n"); + return -EINVAL; + } + + for_each_online_node(nid) { + m = memtrace_alloc_node(nid, size); + + /* + * A node might not have any local memory, so warn but + * continue on. + */ + if (!m) { + pr_err("Failed to allocate trace memory on node %d\n", nid); + continue; + } + + pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); + + memtrace_array[memtrace_array_nr].start = m; + memtrace_array[memtrace_array_nr].size = size; + memtrace_array[memtrace_array_nr].nid = nid; + memtrace_array_nr++; + } + + return 0; +} + +static struct dentry *memtrace_debugfs_dir; + +static int memtrace_init_debugfs(void) +{ + int ret = 0; + int i; + + for (i = 0; i < memtrace_array_nr; i++) { + struct dentry *dir; + struct memtrace_entry *ent = &memtrace_array[i]; + + ent->mem = ioremap(ent->start, ent->size); + /* Warn but continue on */ + if (!ent->mem) { + pr_err("Failed to map trace memory at 0x%llx\n", + ent->start); + ret = -1; + continue; + } + + snprintf(ent->name, 16, "%08x", ent->nid); + dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); + + ent->dir = dir; + debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops); + debugfs_create_x64("start", 0400, dir, &ent->start); + debugfs_create_x64("size", 0400, dir, &ent->size); + } + + return ret; +} + +static int memtrace_free(int nid, u64 start, u64 size) +{ + struct mhp_params params = { .pgprot = PAGE_KERNEL }; + const unsigned long nr_pages = PHYS_PFN(size); + const unsigned long start_pfn = PHYS_PFN(start); + unsigned long pfn; + int ret; + + ret = arch_create_linear_mapping(nid, start, size, ¶ms); + if (ret) + return ret; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) + __ClearPageOffline(pfn_to_page(pfn)); + + free_contig_range(start_pfn, nr_pages); + return 0; +} + +/* + * Iterate through the chunks of memory we allocated and attempt to expose + * them back to the kernel. + */ +static int memtrace_free_regions(void) +{ + int i, ret = 0; + struct memtrace_entry *ent; + + for (i = memtrace_array_nr - 1; i >= 0; i--) { + ent = &memtrace_array[i]; + + /* We have freed this chunk previously */ + if (ent->nid == NUMA_NO_NODE) + continue; + + /* Remove from io mappings */ + if (ent->mem) { + iounmap(ent->mem); + ent->mem = 0; + } + + if (memtrace_free(ent->nid, ent->start, ent->size)) { + pr_err("Failed to free trace memory on node %d\n", + ent->nid); + ret += 1; + continue; + } + + /* + * Memory was freed successfully so clean up references to it + * so on reentry we can tell that this chunk was freed. + */ + debugfs_remove_recursive(ent->dir); + pr_info("Freed trace memory back on node %d\n", ent->nid); + ent->size = ent->start = ent->nid = NUMA_NO_NODE; + } + if (ret) + return ret; + + /* If all chunks of memory were freed successfully, reset globals */ + kfree(memtrace_array); + memtrace_array = NULL; + memtrace_size = 0; + memtrace_array_nr = 0; + return 0; +} + +static int memtrace_enable_set(void *data, u64 val) +{ + int rc = -EAGAIN; + u64 bytes; + + /* + * Don't attempt to do anything if size isn't aligned to a memory + * block or equal to zero. + */ + bytes = memory_block_size_bytes(); + if (val & (bytes - 1)) { + pr_err("Value must be aligned with 0x%llx\n", bytes); + return -EINVAL; + } + + mutex_lock(&memtrace_mutex); + + /* Free all previously allocated memory. */ + if (memtrace_size && memtrace_free_regions()) + goto out_unlock; + + if (!val) { + rc = 0; + goto out_unlock; + } + + /* Allocate memory. */ + if (memtrace_init_regions_runtime(val)) + goto out_unlock; + + if (memtrace_init_debugfs()) + goto out_unlock; + + memtrace_size = val; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; +} + +static int memtrace_enable_get(void *data, u64 *val) +{ + *val = memtrace_size; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, + memtrace_enable_set, "0x%016llx\n"); + +static int memtrace_init(void) +{ + memtrace_debugfs_dir = debugfs_create_dir("memtrace", + arch_debugfs_dir); + + debugfs_create_file("enable", 0600, memtrace_debugfs_dir, + NULL, &memtrace_init_fops); + + return 0; +} +machine_device_initcall(powernv, memtrace_init); diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c new file mode 100644 index 0000000000..64a9c7125c --- /dev/null +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright 2017 IBM Corp. +#include +#include +#include +#include "pci.h" + +#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full +#define PNV_OCXL_ACTAG_MAX 64 +/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ +#define PNV_OCXL_PASID_BITS 15 +#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) + +#define AFU_PRESENT (1 << 31) +#define AFU_INDEX_MASK 0x3F000000 +#define AFU_INDEX_SHIFT 24 +#define ACTAG_MASK 0xFFF + + +struct actag_range { + u16 start; + u16 count; +}; + +struct npu_link { + struct list_head list; + int domain; + int bus; + int dev; + u16 fn_desired_actags[8]; + struct actag_range fn_actags[8]; + bool assignment_done; +}; +static struct list_head links_list = LIST_HEAD_INIT(links_list); +static DEFINE_MUTEX(links_list_lock); + + +/* + * opencapi actags handling: + * + * When sending commands, the opencapi device references the memory + * context it's targeting with an 'actag', which is really an alias + * for a (BDF, pasid) combination. When it receives a command, the NPU + * must do a lookup of the actag to identify the memory context. The + * hardware supports a finite number of actags per link (64 for + * POWER9). + * + * The device can carry multiple functions, and each function can have + * multiple AFUs. Each AFU advertises in its config space the number + * of desired actags. The host must configure in the config space of + * the AFU how many actags the AFU is really allowed to use (which can + * be less than what the AFU desires). + * + * When a PCI function is probed by the driver, it has no visibility + * about the other PCI functions and how many actags they'd like, + * which makes it impossible to distribute actags fairly among AFUs. + * + * Unfortunately, the only way to know how many actags a function + * desires is by looking at the data for each AFU in the config space + * and add them up. Similarly, the only way to know how many actags + * all the functions of the physical device desire is by adding the + * previously computed function counts. Then we can match that against + * what the hardware supports. + * + * To get a comprehensive view, we use a 'pci fixup': at the end of + * PCI enumeration, each function counts how many actags its AFUs + * desire and we save it in a 'npu_link' structure, shared between all + * the PCI functions of a same device. Therefore, when the first + * function is probed by the driver, we can get an idea of the total + * count of desired actags for the device, and assign the actags to + * the AFUs, by pro-rating if needed. + */ + +static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) +{ + int vsec = pos; + u16 vendor, id; + + while ((vsec = pci_find_next_ext_capability(dev, vsec, + OCXL_EXT_CAP_ID_DVSEC))) { + pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, + &vendor); + pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); + if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) + return vsec; + } + return 0; +} + +static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) +{ + int vsec = 0; + u8 idx; + + while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, + vsec))) { + pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, + &idx); + if (idx == afu_idx) + return vsec; + } + return 0; +} + +static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) +{ + int pos; + u32 val; + + pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, + OCXL_DVSEC_FUNC_ID); + if (!pos) + return -ESRCH; + + pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); + if (val & AFU_PRESENT) + *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; + else + *afu_idx = -1; + return 0; +} + +static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) +{ + int pos; + u16 actag_sup; + + pos = find_dvsec_afu_ctrl(dev, afu_idx); + if (!pos) + return -ESRCH; + + pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, + &actag_sup); + *actag = actag_sup & ACTAG_MASK; + return 0; +} + +static struct npu_link *find_link(struct pci_dev *dev) +{ + struct npu_link *link; + + list_for_each_entry(link, &links_list, list) { + /* The functions of a device all share the same link */ + if (link->domain == pci_domain_nr(dev->bus) && + link->bus == dev->bus->number && + link->dev == PCI_SLOT(dev->devfn)) { + return link; + } + } + + /* link doesn't exist yet. Allocate one */ + link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); + if (!link) + return NULL; + link->domain = pci_domain_nr(dev->bus); + link->bus = dev->bus->number; + link->dev = PCI_SLOT(dev->devfn); + list_add(&link->list, &links_list); + return link; +} + +static void pnv_ocxl_fixup_actag(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct npu_link *link; + int rc, afu_idx = -1, i, actag; + + if (!machine_is(powernv)) + return; + + if (phb->type != PNV_PHB_NPU_OCAPI) + return; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_warn(&dev->dev, "couldn't update actag information\n"); + mutex_unlock(&links_list_lock); + return; + } + + /* + * Check how many actags are desired for the AFUs under that + * function and add it to the count for the link + */ + rc = get_max_afu_index(dev, &afu_idx); + if (rc) { + /* Most likely an invalid config space */ + dev_dbg(&dev->dev, "couldn't find AFU information\n"); + afu_idx = -1; + } + + link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; + for (i = 0; i <= afu_idx; i++) { + /* + * AFU index 'holes' are allowed. So don't fail if we + * can't read the actag info for an index + */ + rc = get_actag_count(dev, i, &actag); + if (rc) + continue; + link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; + } + dev_dbg(&dev->dev, "total actags for function: %d\n", + link->fn_desired_actags[PCI_FUNC(dev->devfn)]); + + mutex_unlock(&links_list_lock); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); + +static u16 assign_fn_actags(u16 desired, u16 total) +{ + u16 count; + + if (total <= PNV_OCXL_ACTAG_MAX) + count = desired; + else + count = PNV_OCXL_ACTAG_MAX * desired / total; + + return count; +} + +static void assign_actags(struct npu_link *link) +{ + u16 actag_count, range_start = 0, total_desired = 0; + int i; + + for (i = 0; i < 8; i++) + total_desired += link->fn_desired_actags[i]; + + for (i = 0; i < 8; i++) { + if (link->fn_desired_actags[i]) { + actag_count = assign_fn_actags( + link->fn_desired_actags[i], + total_desired); + link->fn_actags[i].start = range_start; + link->fn_actags[i].count = actag_count; + range_start += actag_count; + WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); + } + pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", + link->domain, link->bus, link->dev, i, + link->fn_actags[i].start, link->fn_actags[i].count, + link->fn_desired_actags[i]); + } + link->assignment_done = true; +} + +int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, + u16 *supported) +{ + struct npu_link *link; + + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + /* + * On p9, we only have 64 actags per link, so they must be + * shared by all the functions of the same adapter. We counted + * the desired actag counts during PCI enumeration, so that we + * can allocate a pro-rated number of actags to each function. + */ + if (!link->assignment_done) + assign_actags(link); + + *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; + *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; + *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; + + mutex_unlock(&links_list_lock); + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); + +int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) +{ + struct npu_link *link; + int i, rc = -EINVAL; + + /* + * The number of PASIDs (process address space ID) which can + * be used by a function depends on how many functions exist + * on the device. The NPU needs to be configured to know how + * many bits are available to PASIDs and how many are to be + * used by the function BDF identifier. + * + * We only support one AFU-carrying function for now. + */ + mutex_lock(&links_list_lock); + + link = find_link(dev); + if (!link) { + dev_err(&dev->dev, "actag information not found\n"); + mutex_unlock(&links_list_lock); + return -ENODEV; + } + + for (i = 0; i < 8; i++) + if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { + *count = PNV_OCXL_PASID_MAX; + rc = 0; + break; + } + + mutex_unlock(&links_list_lock); + dev_dbg(&dev->dev, "%d PASIDs available for function\n", + rc ? 0 : *count); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); + +static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) +{ + int shift, idx; + + WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); + idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; + shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); + buf[idx] |= rate << shift; +} + +int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, + char *rate_buf, int rate_buf_size) +{ + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + /* + * The TL capabilities are a characteristic of the NPU, so + * we go with hard-coded values. + * + * The receiving rate of each template is encoded on 4 bits. + * + * On P9: + * - templates 0 -> 3 are supported + * - templates 0, 1 and 3 have a 0 receiving rate + * - template 2 has receiving rate of 1 (extra cycle) + */ + memset(rate_buf, 0, rate_buf_size); + set_templ_rate(2, 1, rate_buf); + *cap = PNV_OCXL_TL_P9_RECV_CAP; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); + +int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, + uint64_t rate_buf_phys, int rate_buf_size) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int rc; + + if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) + return -EINVAL; + + rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, + rate_buf_phys, rate_buf_size); + if (rc) { + dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); + +int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) +{ + int rc; + + rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); + if (rc) { + dev_err(&dev->dev, + "Can't get translation interrupt for device\n"); + return rc; + } + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); + +void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, + void __iomem *tfc, void __iomem *pe_handle) +{ + iounmap(dsisr); + iounmap(dar); + iounmap(tfc); + iounmap(pe_handle); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); + +int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, + void __iomem **dar, void __iomem **tfc, + void __iomem **pe_handle) +{ + u64 reg; + int i, j, rc = 0; + void __iomem *regs[4]; + + /* + * opal stores the mmio addresses of the DSISR, DAR, TFC and + * PE_HANDLE registers in a device tree property, in that + * order + */ + for (i = 0; i < 4; i++) { + rc = of_property_read_u64_index(dev->dev.of_node, + "ibm,opal-xsl-mmio", i, ®); + if (rc) + break; + regs[i] = ioremap(reg, 8); + if (!regs[i]) { + rc = -EINVAL; + break; + } + } + if (rc) { + dev_err(&dev->dev, "Can't map translation mmio registers\n"); + for (j = i - 1; j >= 0; j--) + iounmap(regs[j]); + } else { + *dsisr = regs[0]; + *dar = regs[1]; + *tfc = regs[2]; + *pe_handle = regs[3]; + } + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); + +struct spa_data { + u64 phb_opal_id; + u32 bdfn; +}; + +int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, + void **platform_data) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct spa_data *data; + u32 bdfn; + int rc; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + bdfn = pci_dev_id(dev); + rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), + PE_mask); + if (rc) { + dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); + kfree(data); + return rc; + } + data->phb_opal_id = phb->opal_id; + data->bdfn = bdfn; + *platform_data = (void *) data; + return 0; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); + +void pnv_ocxl_spa_release(void *platform_data) +{ + struct spa_data *data = (struct spa_data *) platform_data; + int rc; + + rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); + WARN_ON(rc); + kfree(data); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); + +int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) +{ + struct spa_data *data = (struct spa_data *) platform_data; + + return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); + +int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, + uint64_t lpcr, void __iomem **arva) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + u64 mmio_atsd; + int rc; + + /* ATSD physical address. + * ATSD LAUNCH register: write access initiates a shoot down to + * initiate the TLB Invalidate command. + */ + rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", + 0, &mmio_atsd); + if (rc) { + dev_info(&dev->dev, "No available ATSD found\n"); + return rc; + } + + /* Assign a register set to a Logical Partition and MMIO ATSD + * LPARID register to the required value. + */ + rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), + lparid, lpcr); + if (rc) { + dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); + return rc; + } + + *arva = ioremap(mmio_atsd, 24); + if (!(*arva)) { + dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); + rc = -ENOMEM; + } + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); + +void pnv_ocxl_unmap_lpar(void __iomem *arva) +{ + iounmap(arva); +} +EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); + +void pnv_ocxl_tlb_invalidate(void __iomem *arva, + unsigned long pid, + unsigned long addr, + unsigned long page_size) +{ + unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); + u64 val = 0ull; + int pend; + u8 size; + + if (!(arva)) + return; + + if (addr) { + /* load Abbreviated Virtual Address register with + * the necessary value + */ + val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); + out_be64(arva + PNV_OCXL_ATSD_AVA, val); + } + + /* Write access initiates a shoot down to initiate the + * TLB Invalidate command + */ + val = PNV_OCXL_ATSD_LNCH_R; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); + if (addr) + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); + else { + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); + val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; + } + val |= PNV_OCXL_ATSD_LNCH_PRS; + /* Actual Page Size to be invalidated + * 000 4KB + * 101 64KB + * 001 2MB + * 010 1GB + */ + size = 0b101; + if (page_size == 0x1000) + size = 0b000; + if (page_size == 0x200000) + size = 0b001; + if (page_size == 0x40000000) + size = 0b010; + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); + val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); + out_be64(arva + PNV_OCXL_ATSD_LNCH, val); + + /* Poll the ATSD status register to determine when the + * TLB Invalidate has been completed. + */ + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + + while (pend) { + if (time_after_eq(jiffies, timeout)) { + pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", + __func__, val, pid); + return; + } + cpu_relax(); + val = in_be64(arva + PNV_OCXL_ATSD_STAT); + pend = val >> 63; + } +} +EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate); diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c new file mode 100644 index 0000000000..c094fdf582 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL asynchronous completion interfaces + * + * Copyright 2013-2017 IBM Corp. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum opal_async_token_state { + ASYNC_TOKEN_UNALLOCATED = 0, + ASYNC_TOKEN_ALLOCATED, + ASYNC_TOKEN_DISPATCHED, + ASYNC_TOKEN_ABANDONED, + ASYNC_TOKEN_COMPLETED +}; + +struct opal_async_token { + enum opal_async_token_state state; + struct opal_msg response; +}; + +static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait); +static DEFINE_SPINLOCK(opal_async_comp_lock); +static struct semaphore opal_async_sem; +static unsigned int opal_max_async_tokens; +static struct opal_async_token *opal_async_tokens; + +static int __opal_async_get_token(void) +{ + unsigned long flags; + int i, token = -EBUSY; + + spin_lock_irqsave(&opal_async_comp_lock, flags); + + for (i = 0; i < opal_max_async_tokens; i++) { + if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) { + opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED; + token = i; + break; + } + } + + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + return token; +} + +/* + * Note: If the returned token is used in an opal call and opal returns + * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or + * opal_async_wait_response_interruptible() at least once before calling another + * opal_async_* function + */ +int opal_async_get_token_interruptible(void) +{ + int token; + + /* Wait until a token is available */ + if (down_interruptible(&opal_async_sem)) + return -ERESTARTSYS; + + token = __opal_async_get_token(); + if (token < 0) + up(&opal_async_sem); + + return token; +} +EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); + +static int __opal_async_release_token(int token) +{ + unsigned long flags; + int rc; + + if (token < 0 || token >= opal_max_async_tokens) { + pr_err("%s: Passed token is out of range, token %d\n", + __func__, token); + return -EINVAL; + } + + spin_lock_irqsave(&opal_async_comp_lock, flags); + switch (opal_async_tokens[token].state) { + case ASYNC_TOKEN_COMPLETED: + case ASYNC_TOKEN_ALLOCATED: + opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED; + rc = 0; + break; + /* + * DISPATCHED and ABANDONED tokens must wait for OPAL to respond. + * Mark a DISPATCHED token as ABANDONED so that the response handling + * code knows no one cares and that it can free it then. + */ + case ASYNC_TOKEN_DISPATCHED: + opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED; + fallthrough; + default: + rc = 1; + } + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + + return rc; +} + +int opal_async_release_token(int token) +{ + int ret; + + ret = __opal_async_release_token(token); + if (!ret) + up(&opal_async_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(opal_async_release_token); + +int opal_async_wait_response(uint64_t token, struct opal_msg *msg) +{ + if (token >= opal_max_async_tokens) { + pr_err("%s: Invalid token passed\n", __func__); + return -EINVAL; + } + + if (!msg) { + pr_err("%s: Invalid message pointer passed\n", __func__); + return -EINVAL; + } + + /* + * There is no need to mark the token as dispatched, wait_event() + * will block until the token completes. + * + * Wakeup the poller before we wait for events to speed things + * up on platforms or simulators where the interrupts aren't + * functional. + */ + opal_wake_poller(); + wait_event(opal_async_wait, opal_async_tokens[token].state + == ASYNC_TOKEN_COMPLETED); + memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg)); + + return 0; +} +EXPORT_SYMBOL_GPL(opal_async_wait_response); + +int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg) +{ + unsigned long flags; + int ret; + + if (token >= opal_max_async_tokens) { + pr_err("%s: Invalid token passed\n", __func__); + return -EINVAL; + } + + if (!msg) { + pr_err("%s: Invalid message pointer passed\n", __func__); + return -EINVAL; + } + + /* + * The first time this gets called we mark the token as DISPATCHED + * so that if wait_event_interruptible() returns not zero and the + * caller frees the token, we know not to actually free the token + * until the response comes. + * + * Only change if the token is ALLOCATED - it may have been + * completed even before the caller gets around to calling this + * the first time. + * + * There is also a dirty great comment at the token allocation + * function that if the opal call returns OPAL_ASYNC_COMPLETION to + * the caller then the caller *must* call this or the not + * interruptible version before doing anything else with the + * token. + */ + if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) { + spin_lock_irqsave(&opal_async_comp_lock, flags); + if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) + opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED; + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + } + + /* + * Wakeup the poller before we wait for events to speed things + * up on platforms or simulators where the interrupts aren't + * functional. + */ + opal_wake_poller(); + ret = wait_event_interruptible(opal_async_wait, + opal_async_tokens[token].state == + ASYNC_TOKEN_COMPLETED); + if (!ret) + memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg)); + + return ret; +} +EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible); + +/* Called from interrupt context */ +static int opal_async_comp_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + struct opal_msg *comp_msg = msg; + enum opal_async_token_state state; + unsigned long flags; + uint64_t token; + + if (msg_type != OPAL_MSG_ASYNC_COMP) + return 0; + + token = be64_to_cpu(comp_msg->params[0]); + spin_lock_irqsave(&opal_async_comp_lock, flags); + state = opal_async_tokens[token].state; + opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED; + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + + if (state == ASYNC_TOKEN_ABANDONED) { + /* Free the token, no one else will */ + opal_async_release_token(token); + return 0; + } + memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg)); + wake_up(&opal_async_wait); + + return 0; +} + +static struct notifier_block opal_async_comp_nb = { + .notifier_call = opal_async_comp_event, + .next = NULL, + .priority = 0, +}; + +int __init opal_async_comp_init(void) +{ + struct device_node *opal_node; + const __be32 *async; + int err; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_err("%s: Opal node not found\n", __func__); + err = -ENOENT; + goto out; + } + + async = of_get_property(opal_node, "opal-msg-async-num", NULL); + if (!async) { + pr_err("%s: %pOF has no opal-msg-async-num\n", + __func__, opal_node); + err = -ENOENT; + goto out_opal_node; + } + + opal_max_async_tokens = be32_to_cpup(async); + opal_async_tokens = kcalloc(opal_max_async_tokens, + sizeof(*opal_async_tokens), GFP_KERNEL); + if (!opal_async_tokens) { + err = -ENOMEM; + goto out_opal_node; + } + + err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP, + &opal_async_comp_nb); + if (err) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, err); + kfree(opal_async_tokens); + goto out_opal_node; + } + + sema_init(&opal_async_sem, opal_max_async_tokens); + +out_opal_node: + of_node_put(opal_node); +out: + return err; +} diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c new file mode 100644 index 0000000000..021b0ec29e --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_TRACEPOINTS +/* + * Since the tracing code might execute OPAL calls we need to guard against + * recursion. + */ +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); + +static void __trace_opal_entry(s64 a0, s64 a1, s64 a2, s64 a3, + s64 a4, s64 a5, s64 a6, s64 a7, + unsigned long opcode) +{ + unsigned int *depth; + unsigned long args[8]; + + depth = this_cpu_ptr(&opal_trace_depth); + + if (*depth) + return; + + args[0] = a0; + args[1] = a1; + args[2] = a2; + args[3] = a3; + args[4] = a4; + args[5] = a5; + args[6] = a6; + args[7] = a7; + + (*depth)++; + trace_opal_entry(opcode, &args[0]); + (*depth)--; +} + +static void __trace_opal_exit(unsigned long opcode, unsigned long retval) +{ + unsigned int *depth; + + depth = this_cpu_ptr(&opal_trace_depth); + + if (*depth) + return; + + (*depth)++; + trace_opal_exit(opcode, retval); + (*depth)--; +} + +static DEFINE_STATIC_KEY_FALSE(opal_tracepoint_key); + +int opal_tracepoint_regfunc(void) +{ + static_branch_inc(&opal_tracepoint_key); + return 0; +} + +void opal_tracepoint_unregfunc(void) +{ + static_branch_dec(&opal_tracepoint_key); +} + +static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3, + s64 a4, s64 a5, s64 a6, s64 a7, + unsigned long opcode, unsigned long msr) +{ + s64 ret; + + __trace_opal_entry(a0, a1, a2, a3, a4, a5, a6, a7, opcode); + ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + __trace_opal_exit(opcode, ret); + + return ret; +} + +#define DO_TRACE (static_branch_unlikely(&opal_tracepoint_key)) + +#else /* CONFIG_TRACEPOINTS */ + +static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3, + s64 a4, s64 a5, s64 a6, s64 a7, + unsigned long opcode, unsigned long msr) +{ + return 0; +} + +#define DO_TRACE false +#endif /* CONFIG_TRACEPOINTS */ + +static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, + int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode) +{ + unsigned long flags; + unsigned long msr = mfmsr(); + bool mmu = (msr & (MSR_IR|MSR_DR)); + int64_t ret; + + /* OPAL call / firmware may use SRR and/or HSRR */ + srr_regs_clobbered(); + + msr &= ~MSR_EE; + + if (unlikely(!mmu)) + return __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + + local_save_flags(flags); + hard_irq_disable(); + + if (DO_TRACE) { + ret = __opal_call_trace(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + } else { + ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr); + } + + local_irq_restore(flags); + + return ret; +} + +#define OPAL_CALL(name, opcode) \ +int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ + int64_t a4, int64_t a5, int64_t a6, int64_t a7); \ +int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ + int64_t a4, int64_t a5, int64_t a6, int64_t a7) \ +{ \ + return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \ +} + +OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); +OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); +OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); +OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE); +OPAL_CALL(opal_rtc_read, OPAL_RTC_READ); +OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE); +OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN); +OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT); +OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2); +OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM); +OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM); +OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT); +OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS); +OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY); +OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY); +OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE); +OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD); +OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD); +OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE); +OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); +OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); +OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); +OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); +OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); +OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); +OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR); +OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET); +OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT); +OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC); +OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE); +OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW); +OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW); +OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY); +OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE); +OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV); +OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE); +OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE); +OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE); +OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE); +OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32); +OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64); +OPAL_CALL(opal_start_cpu, OPAL_START_CPU); +OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS); +OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL); +OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW); +OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL); +OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET); +OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA); +OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA); +OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB); +OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT); +OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); +OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); +OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); +OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS); +OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); +OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); +OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); +OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); +OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2); +OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ); +OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE); +OPAL_CALL(opal_lpc_read, OPAL_LPC_READ); +OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE); +OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); +OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS); +OPAL_CALL(opal_read_elog, OPAL_ELOG_READ); +OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK); +OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE); +OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND); +OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE); +OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); +OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); +OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); +OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); +OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); +OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); +OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); +OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); +OPAL_CALL(opal_dump_read, OPAL_DUMP_READ); +OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK); +OPAL_CALL(opal_get_msg, OPAL_GET_MSG); +OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC); +OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); +OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND); +OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); +OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); +OPAL_CALL(opal_get_param, OPAL_GET_PARAM); +OPAL_CALL(opal_set_param, OPAL_SET_PARAM); +OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2); +OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); +OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); +OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); +OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); +OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE); +OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO); +OPAL_CALL(opal_tpo_read, OPAL_READ_TPO); +OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND); +OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV); +OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST); +OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); +OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); +OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); +OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); +OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); +OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); +OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); +OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE); +OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); +OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); +OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); +OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); +OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); +OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); +OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); +OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); +OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR); +OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET); +OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO); +OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG); +OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG); +OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO); +OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO); +OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE); +OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK); +OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK); +OPAL_CALL(opal_xive_allocate_irq_raw, OPAL_XIVE_ALLOCATE_IRQ); +OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ); +OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO); +OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); +OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); +OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); +OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE); +OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE); +OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE); +OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); +OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); +OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); +OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); +OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); +OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); +OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); +OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); +OPAL_CALL(opal_quiesce, OPAL_QUIESCE); +OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); +OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); +OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); +OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); +OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR); +OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64); +OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE); +OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT); +OPAL_CALL(opal_mpipl_update, OPAL_MPIPL_UPDATE); +OPAL_CALL(opal_mpipl_register_tag, OPAL_MPIPL_REGISTER_TAG); +OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG); +OPAL_CALL(opal_secvar_get, OPAL_SECVAR_GET); +OPAL_CALL(opal_secvar_get_next, OPAL_SECVAR_GET_NEXT); +OPAL_CALL(opal_secvar_enqueue_update, OPAL_SECVAR_ENQUEUE_UPDATE); diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c new file mode 100644 index 0000000000..bb7657115f --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -0,0 +1,663 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Interface for exporting the OPAL ELF core. + * Heavily inspired from fs/proc/vmcore.c + * + * Copyright 2019, Hari Bathini, IBM Corporation. + */ + +#define pr_fmt(fmt) "opal core: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "opal-fadump.h" + +#define MAX_PT_LOAD_CNT 8 + +/* NT_AUXV note related info */ +#define AUXV_CNT 1 +#define AUXV_DESC_SZ (((2 * AUXV_CNT) + 1) * sizeof(Elf64_Off)) + +struct opalcore_config { + u32 num_cpus; + /* PIR value of crashing CPU */ + u32 crashing_cpu; + + /* CPU state data info from F/W */ + u64 cpu_state_destination_vaddr; + u64 cpu_state_data_size; + u64 cpu_state_entry_size; + + /* OPAL memory to be exported as PT_LOAD segments */ + u64 ptload_addr[MAX_PT_LOAD_CNT]; + u64 ptload_size[MAX_PT_LOAD_CNT]; + u64 ptload_cnt; + + /* Pointer to the first PT_LOAD in the ELF core file */ + Elf64_Phdr *ptload_phdr; + + /* Total size of opalcore file. */ + size_t opalcore_size; + + /* Buffer for all the ELF core headers and the PT_NOTE */ + size_t opalcorebuf_sz; + char *opalcorebuf; + + /* NT_AUXV buffer */ + char auxv_buf[AUXV_DESC_SZ]; +}; + +struct opalcore { + struct list_head list; + u64 paddr; + size_t size; + loff_t offset; +}; + +static LIST_HEAD(opalcore_list); +static struct opalcore_config *oc_conf; +static const struct opal_mpipl_fadump *opalc_metadata; +static const struct opal_mpipl_fadump *opalc_cpu_metadata; +static struct kobject *mpipl_kobj; + +/* + * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered + * by kernel, SIGTERM otherwise. + */ +bool kernel_initiated; + +static struct opalcore * __init get_new_element(void) +{ + return kzalloc(sizeof(struct opalcore), GFP_KERNEL); +} + +static inline int is_opalcore_usable(void) +{ + return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0; +} + +static Elf64_Word *__init append_elf64_note(Elf64_Word *buf, char *name, + u32 type, void *data, + size_t data_len) +{ + Elf64_Nhdr *note = (Elf64_Nhdr *)buf; + Elf64_Word namesz = strlen(name) + 1; + + note->n_namesz = cpu_to_be32(namesz); + note->n_descsz = cpu_to_be32(data_len); + note->n_type = cpu_to_be32(type); + buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf64_Word)); + memcpy(buf, name, namesz); + buf += DIV_ROUND_UP(namesz, sizeof(Elf64_Word)); + memcpy(buf, data, data_len); + buf += DIV_ROUND_UP(data_len, sizeof(Elf64_Word)); + + return buf; +} + +static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir, + struct pt_regs *regs) +{ + memset(prstatus, 0, sizeof(struct elf_prstatus)); + elf_core_copy_regs(&(prstatus->pr_reg), regs); + + /* + * Overload PID with PIR value. + * As a PIR value could also be '0', add an offset of '100' + * to every PIR to avoid misinterpretations in GDB. + */ + prstatus->common.pr_pid = cpu_to_be32(100 + pir); + prstatus->common.pr_ppid = cpu_to_be32(1); + + /* + * Indicate SIGUSR1 for crash initiated from kernel. + * SIGTERM otherwise. + */ + if (pir == oc_conf->crashing_cpu) { + short sig; + + sig = kernel_initiated ? SIGUSR1 : SIGTERM; + prstatus->common.pr_cursig = cpu_to_be16(sig); + } +} + +static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf, + u64 opal_boot_entry) +{ + Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf; + int idx = 0; + + memset(bufp, 0, AUXV_DESC_SZ); + + /* Entry point of OPAL */ + bufp[idx++] = cpu_to_be64(AT_ENTRY); + bufp[idx++] = cpu_to_be64(opal_boot_entry); + + /* end of vector */ + bufp[idx++] = cpu_to_be64(AT_NULL); + + buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_AUXV, + oc_conf->auxv_buf, AUXV_DESC_SZ); + return buf; +} + +/* + * Read from the ELF header and then the crash dump. + * Returns number of bytes read on success, -errno on failure. + */ +static ssize_t read_opalcore(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + struct opalcore *m; + ssize_t tsz, avail; + loff_t tpos = pos; + + if (pos >= oc_conf->opalcore_size) + return 0; + + /* Adjust count if it goes beyond opalcore size */ + avail = oc_conf->opalcore_size - pos; + if (count > avail) + count = avail; + + if (count == 0) + return 0; + + /* Read ELF core header and/or PT_NOTE segment */ + if (tpos < oc_conf->opalcorebuf_sz) { + tsz = min_t(size_t, oc_conf->opalcorebuf_sz - tpos, count); + memcpy(to, oc_conf->opalcorebuf + tpos, tsz); + to += tsz; + tpos += tsz; + count -= tsz; + } + + list_for_each_entry(m, &opalcore_list, list) { + /* nothing more to read here */ + if (count == 0) + break; + + if (tpos < m->offset + m->size) { + void *addr; + + tsz = min_t(size_t, m->offset + m->size - tpos, count); + addr = (void *)(m->paddr + tpos - m->offset); + memcpy(to, __va(addr), tsz); + to += tsz; + tpos += tsz; + count -= tsz; + } + } + + return (tpos - pos); +} + +static struct bin_attribute opal_core_attr = { + .attr = {.name = "core", .mode = 0400}, + .read = read_opalcore +}; + +/* + * Read CPU state dump data and convert it into ELF notes. + * + * Each register entry is of 16 bytes, A numerical identifier along with + * a GPR/SPR flag in the first 8 bytes and the register value in the next + * 8 bytes. For more details refer to F/W documentation. + */ +static Elf64_Word * __init opalcore_append_cpu_notes(Elf64_Word *buf) +{ + u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize; + struct hdat_fadump_thread_hdr *thdr; + struct elf_prstatus prstatus; + Elf64_Word *first_cpu_note; + struct pt_regs regs; + char *bufp; + int i; + + size_per_thread = oc_conf->cpu_state_entry_size; + bufp = __va(oc_conf->cpu_state_destination_vaddr); + + /* + * Offset for register entries, entry size and registers count is + * duplicated in every thread header in keeping with HDAT format. + * Use these values from the first thread header. + */ + thdr = (struct hdat_fadump_thread_hdr *)bufp; + regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) + + be32_to_cpu(thdr->offset)); + reg_esize = be32_to_cpu(thdr->esize); + regs_cnt = be32_to_cpu(thdr->ecnt); + + pr_debug("--------CPU State Data------------\n"); + pr_debug("NumCpus : %u\n", oc_conf->num_cpus); + pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n", + regs_offset, reg_esize, regs_cnt); + + /* + * Skip past the first CPU note. Fill this note with the + * crashing CPU's prstatus. + */ + first_cpu_note = buf; + buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, + &prstatus, sizeof(prstatus)); + + for (i = 0; i < oc_conf->num_cpus; i++, bufp += size_per_thread) { + thdr = (struct hdat_fadump_thread_hdr *)bufp; + thread_pir = be32_to_cpu(thdr->pir); + + pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n", + i, thread_pir, thdr->core_state); + + /* + * Register state data of MAX cores is provided by firmware, + * but some of this cores may not be active. So, while + * processing register state data, check core state and + * skip threads that belong to inactive cores. + */ + if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE) + continue; + + opal_fadump_read_regs((bufp + regs_offset), regs_cnt, + reg_esize, false, ®s); + + pr_debug("PIR 0x%x - R1 : 0x%llx, NIP : 0x%llx\n", thread_pir, + be64_to_cpu(regs.gpr[1]), be64_to_cpu(regs.nip)); + fill_prstatus(&prstatus, thread_pir, ®s); + + if (thread_pir != oc_conf->crashing_cpu) { + buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, + NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + } else { + /* + * Add crashing CPU as the first NT_PRSTATUS note for + * GDB to process the core file appropriately. + */ + append_elf64_note(first_cpu_note, CRASH_CORE_NOTE_NAME, + NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + } + } + + return buf; +} + +static int __init create_opalcore(void) +{ + u64 opal_boot_entry, opal_base_addr, paddr; + u32 hdr_size, cpu_notes_size, count; + struct device_node *dn; + struct opalcore *new; + loff_t opalcore_off; + struct page *page; + Elf64_Phdr *phdr; + Elf64_Ehdr *elf; + int i, ret; + char *bufp; + + /* Get size of header & CPU notes for OPAL core */ + hdr_size = (sizeof(Elf64_Ehdr) + + ((oc_conf->ptload_cnt + 1) * sizeof(Elf64_Phdr))); + cpu_notes_size = ((oc_conf->num_cpus * (CRASH_CORE_NOTE_HEAD_BYTES + + CRASH_CORE_NOTE_NAME_BYTES + + CRASH_CORE_NOTE_DESC_BYTES)) + + (CRASH_CORE_NOTE_HEAD_BYTES + + CRASH_CORE_NOTE_NAME_BYTES + AUXV_DESC_SZ)); + + /* Allocate buffer to setup OPAL core */ + oc_conf->opalcorebuf_sz = PAGE_ALIGN(hdr_size + cpu_notes_size); + oc_conf->opalcorebuf = alloc_pages_exact(oc_conf->opalcorebuf_sz, + GFP_KERNEL | __GFP_ZERO); + if (!oc_conf->opalcorebuf) { + pr_err("Not enough memory to setup OPAL core (size: %lu)\n", + oc_conf->opalcorebuf_sz); + oc_conf->opalcorebuf_sz = 0; + return -ENOMEM; + } + count = oc_conf->opalcorebuf_sz / PAGE_SIZE; + page = virt_to_page(oc_conf->opalcorebuf); + for (i = 0; i < count; i++) + mark_page_reserved(page + i); + + pr_debug("opalcorebuf = 0x%llx\n", (u64)oc_conf->opalcorebuf); + + /* Read OPAL related device-tree entries */ + dn = of_find_node_by_name(NULL, "ibm,opal"); + if (dn) { + ret = of_property_read_u64(dn, "opal-base-address", + &opal_base_addr); + pr_debug("opal-base-address: %llx\n", opal_base_addr); + ret |= of_property_read_u64(dn, "opal-boot-address", + &opal_boot_entry); + pr_debug("opal-boot-address: %llx\n", opal_boot_entry); + } + if (!dn || ret) + pr_warn("WARNING: Failed to read OPAL base & entry values\n"); + + of_node_put(dn); + + /* Use count to keep track of the program headers */ + count = 0; + + bufp = oc_conf->opalcorebuf; + elf = (Elf64_Ehdr *)bufp; + bufp += sizeof(Elf64_Ehdr); + memcpy(elf->e_ident, ELFMAG, SELFMAG); + elf->e_ident[EI_CLASS] = ELF_CLASS; + elf->e_ident[EI_DATA] = ELFDATA2MSB; + elf->e_ident[EI_VERSION] = EV_CURRENT; + elf->e_ident[EI_OSABI] = ELF_OSABI; + memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); + elf->e_type = cpu_to_be16(ET_CORE); + elf->e_machine = cpu_to_be16(ELF_ARCH); + elf->e_version = cpu_to_be32(EV_CURRENT); + elf->e_entry = 0; + elf->e_phoff = cpu_to_be64(sizeof(Elf64_Ehdr)); + elf->e_shoff = 0; + elf->e_flags = 0; + + elf->e_ehsize = cpu_to_be16(sizeof(Elf64_Ehdr)); + elf->e_phentsize = cpu_to_be16(sizeof(Elf64_Phdr)); + elf->e_phnum = 0; + elf->e_shentsize = 0; + elf->e_shnum = 0; + elf->e_shstrndx = 0; + + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = cpu_to_be32(PT_NOTE); + phdr->p_flags = 0; + phdr->p_align = 0; + phdr->p_paddr = phdr->p_vaddr = 0; + phdr->p_offset = cpu_to_be64(hdr_size); + phdr->p_filesz = phdr->p_memsz = cpu_to_be64(cpu_notes_size); + count++; + + opalcore_off = oc_conf->opalcorebuf_sz; + oc_conf->ptload_phdr = (Elf64_Phdr *)bufp; + paddr = 0; + for (i = 0; i < oc_conf->ptload_cnt; i++) { + phdr = (Elf64_Phdr *)bufp; + bufp += sizeof(Elf64_Phdr); + phdr->p_type = cpu_to_be32(PT_LOAD); + phdr->p_flags = cpu_to_be32(PF_R|PF_W|PF_X); + phdr->p_align = 0; + + new = get_new_element(); + if (!new) + return -ENOMEM; + new->paddr = oc_conf->ptload_addr[i]; + new->size = oc_conf->ptload_size[i]; + new->offset = opalcore_off; + list_add_tail(&new->list, &opalcore_list); + + phdr->p_paddr = cpu_to_be64(paddr); + phdr->p_vaddr = cpu_to_be64(opal_base_addr + paddr); + phdr->p_filesz = phdr->p_memsz = + cpu_to_be64(oc_conf->ptload_size[i]); + phdr->p_offset = cpu_to_be64(opalcore_off); + + count++; + opalcore_off += oc_conf->ptload_size[i]; + paddr += oc_conf->ptload_size[i]; + } + + elf->e_phnum = cpu_to_be16(count); + + bufp = (char *)opalcore_append_cpu_notes((Elf64_Word *)bufp); + bufp = (char *)auxv_to_elf64_notes((Elf64_Word *)bufp, opal_boot_entry); + + oc_conf->opalcore_size = opalcore_off; + return 0; +} + +static void opalcore_cleanup(void) +{ + if (oc_conf == NULL) + return; + + /* Remove OPAL core sysfs file */ + sysfs_remove_bin_file(mpipl_kobj, &opal_core_attr); + oc_conf->ptload_phdr = NULL; + oc_conf->ptload_cnt = 0; + + /* free the buffer used for setting up OPAL core */ + if (oc_conf->opalcorebuf) { + void *end = (void *)((u64)oc_conf->opalcorebuf + + oc_conf->opalcorebuf_sz); + + free_reserved_area(oc_conf->opalcorebuf, end, -1, NULL); + oc_conf->opalcorebuf = NULL; + oc_conf->opalcorebuf_sz = 0; + } + + kfree(oc_conf); + oc_conf = NULL; +} +__exitcall(opalcore_cleanup); + +static void __init opalcore_config_init(void) +{ + u32 idx, cpu_data_version; + struct device_node *np; + const __be32 *prop; + u64 addr = 0; + int i, ret; + + np = of_find_node_by_path("/ibm,opal/dump"); + if (np == NULL) + return; + + if (!of_device_is_compatible(np, "ibm,opal-dump")) { + pr_warn("Support missing for this f/w version!\n"); + return; + } + + /* Check if dump has been initiated on last reboot */ + prop = of_get_property(np, "mpipl-boot", NULL); + if (!prop) { + of_node_put(np); + return; + } + + /* Get OPAL metadata */ + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_OPAL, &addr); + if ((ret != OPAL_SUCCESS) || !addr) { + pr_err("Failed to get OPAL metadata (%d)\n", ret); + goto error_out; + } + + addr = be64_to_cpu(addr); + pr_debug("OPAL metadata addr: %llx\n", addr); + opalc_metadata = __va(addr); + + /* Get OPAL CPU metadata */ + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr); + if ((ret != OPAL_SUCCESS) || !addr) { + pr_err("Failed to get OPAL CPU metadata (%d)\n", ret); + goto error_out; + } + + addr = be64_to_cpu(addr); + pr_debug("CPU metadata addr: %llx\n", addr); + opalc_cpu_metadata = __va(addr); + + /* Allocate memory for config buffer */ + oc_conf = kzalloc(sizeof(struct opalcore_config), GFP_KERNEL); + if (oc_conf == NULL) + goto error_out; + + /* Parse OPAL metadata */ + if (opalc_metadata->version != OPAL_MPIPL_VERSION) { + pr_warn("Supported OPAL metadata version: %u, found: %u!\n", + OPAL_MPIPL_VERSION, opalc_metadata->version); + pr_warn("WARNING: F/W using newer OPAL metadata format!!\n"); + } + + oc_conf->ptload_cnt = 0; + idx = be32_to_cpu(opalc_metadata->region_cnt); + if (idx > MAX_PT_LOAD_CNT) { + pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)", + idx, MAX_PT_LOAD_CNT); + idx = MAX_PT_LOAD_CNT; + } + for (i = 0; i < idx; i++) { + oc_conf->ptload_addr[oc_conf->ptload_cnt] = + be64_to_cpu(opalc_metadata->region[i].dest); + oc_conf->ptload_size[oc_conf->ptload_cnt++] = + be64_to_cpu(opalc_metadata->region[i].size); + } + oc_conf->ptload_cnt = i; + oc_conf->crashing_cpu = be32_to_cpu(opalc_metadata->crashing_pir); + + if (!oc_conf->ptload_cnt) { + pr_err("OPAL memory regions not found\n"); + goto error_out; + } + + /* Parse OPAL CPU metadata */ + cpu_data_version = be32_to_cpu(opalc_cpu_metadata->cpu_data_version); + if (cpu_data_version != HDAT_FADUMP_CPU_DATA_VER) { + pr_warn("Supported CPU data version: %u, found: %u!\n", + HDAT_FADUMP_CPU_DATA_VER, cpu_data_version); + pr_warn("WARNING: F/W using newer CPU state data format!!\n"); + } + + addr = be64_to_cpu(opalc_cpu_metadata->region[0].dest); + if (!addr) { + pr_err("CPU state data not found!\n"); + goto error_out; + } + oc_conf->cpu_state_destination_vaddr = (u64)__va(addr); + + oc_conf->cpu_state_data_size = + be64_to_cpu(opalc_cpu_metadata->region[0].size); + oc_conf->cpu_state_entry_size = + be32_to_cpu(opalc_cpu_metadata->cpu_data_size); + + if ((oc_conf->cpu_state_entry_size == 0) || + (oc_conf->cpu_state_entry_size > oc_conf->cpu_state_data_size)) { + pr_err("CPU state data is invalid.\n"); + goto error_out; + } + oc_conf->num_cpus = (oc_conf->cpu_state_data_size / + oc_conf->cpu_state_entry_size); + + of_node_put(np); + return; + +error_out: + pr_err("Could not export /sys/firmware/opal/core\n"); + opalcore_cleanup(); + of_node_put(np); +} + +static ssize_t release_core_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int input = -1; + + if (kstrtoint(buf, 0, &input)) + return -EINVAL; + + if (input == 1) { + if (oc_conf == NULL) { + pr_err("'/sys/firmware/opal/core' file not accessible!\n"); + return -EPERM; + } + + /* + * Take away '/sys/firmware/opal/core' and release all memory + * used for exporting this file. + */ + opalcore_cleanup(); + } else + return -EINVAL; + + return count; +} + +static struct kobj_attribute opalcore_rel_attr = __ATTR_WO(release_core); + +static struct attribute *mpipl_attr[] = { + &opalcore_rel_attr.attr, + NULL, +}; + +static struct bin_attribute *mpipl_bin_attr[] = { + &opal_core_attr, + NULL, + +}; + +static const struct attribute_group mpipl_group = { + .attrs = mpipl_attr, + .bin_attrs = mpipl_bin_attr, +}; + +static int __init opalcore_init(void) +{ + int rc = -1; + + opalcore_config_init(); + + if (oc_conf == NULL) + return rc; + + create_opalcore(); + + /* + * If oc_conf->opalcorebuf= is set in the 2nd kernel, + * then capture the dump. + */ + if (!(is_opalcore_usable())) { + pr_err("Failed to export /sys/firmware/opal/mpipl/core\n"); + opalcore_cleanup(); + return rc; + } + + /* Set OPAL core file size */ + opal_core_attr.size = oc_conf->opalcore_size; + + mpipl_kobj = kobject_create_and_add("mpipl", opal_kobj); + if (!mpipl_kobj) { + pr_err("unable to create mpipl kobject\n"); + return -ENOMEM; + } + + /* Export OPAL core sysfs file */ + rc = sysfs_create_group(mpipl_kobj, &mpipl_group); + if (rc) { + pr_err("mpipl sysfs group creation failed (%d)", rc); + opalcore_cleanup(); + return rc; + } + /* The /sys/firmware/opal/core is moved to /sys/firmware/opal/mpipl/ + * directory, need to create symlink at old location to maintain + * backward compatibility. + */ + rc = compat_only_sysfs_link_entry_to_kobj(opal_kobj, mpipl_kobj, + "core", NULL); + if (rc) { + pr_err("unable to create core symlink (%d)\n", rc); + return rc; + } + + return 0; +} +fs_initcall(opalcore_init); diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c new file mode 100644 index 0000000000..16c5860f13 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL Dump Interface + * + * Copyright 2013,2014 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DUMP_TYPE_FSP 0x01 + +struct dump_obj { + struct kobject kobj; + struct bin_attribute dump_attr; + uint32_t id; /* becomes object name */ + uint32_t type; + uint32_t size; + char *buffer; +}; +#define to_dump_obj(x) container_of(x, struct dump_obj, kobj) + +struct dump_attribute { + struct attribute attr; + ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr, + char *buf); + ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr, + const char *buf, size_t count); +}; +#define to_dump_attr(x) container_of(x, struct dump_attribute, attr) + +static ssize_t dump_id_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x%x\n", dump_obj->id); +} + +static const char* dump_type_to_string(uint32_t type) +{ + switch (type) { + case 0x01: return "SP Dump"; + case 0x02: return "System/Platform Dump"; + case 0x03: return "SMA Dump"; + default: return "unknown"; + } +} + +static ssize_t dump_type_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + + return sprintf(buf, "0x%x %s\n", dump_obj->type, + dump_type_to_string(dump_obj->type)); +} + +static ssize_t dump_ack_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + return sprintf(buf, "ack - acknowledge dump\n"); +} + +/* + * Send acknowledgement to OPAL + */ +static int64_t dump_send_ack(uint32_t dump_id) +{ + int rc; + + rc = opal_dump_ack(dump_id); + if (rc) + pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n", + __func__, dump_id, rc); + return rc; +} + +static ssize_t dump_ack_store(struct dump_obj *dump_obj, + struct dump_attribute *attr, + const char *buf, + size_t count) +{ + /* + * Try to self remove this attribute. If we are successful, + * delete the kobject itself. + */ + if (sysfs_remove_file_self(&dump_obj->kobj, &attr->attr)) { + dump_send_ack(dump_obj->id); + kobject_put(&dump_obj->kobj); + } + return count; +} + +/* Attributes of a dump + * The binary attribute of the dump itself is dynamic + * due to the dynamic size of the dump + */ +static struct dump_attribute id_attribute = + __ATTR(id, 0444, dump_id_show, NULL); +static struct dump_attribute type_attribute = + __ATTR(type, 0444, dump_type_show, NULL); +static struct dump_attribute ack_attribute = + __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store); + +static ssize_t init_dump_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n"); +} + +static int64_t dump_fips_init(uint8_t type) +{ + int rc; + + rc = opal_dump_init(type); + if (rc) + pr_warn("%s: Failed to initiate FSP dump (%d)\n", + __func__, rc); + return rc; +} + +static ssize_t init_dump_store(struct dump_obj *dump_obj, + struct dump_attribute *attr, + const char *buf, + size_t count) +{ + int rc; + + rc = dump_fips_init(DUMP_TYPE_FSP); + if (rc == OPAL_SUCCESS) + pr_info("%s: Initiated FSP dump\n", __func__); + + return count; +} + +static struct dump_attribute initiate_attribute = + __ATTR(initiate_dump, 0600, init_dump_show, init_dump_store); + +static struct attribute *initiate_attrs[] = { + &initiate_attribute.attr, + NULL, +}; + +static const struct attribute_group initiate_attr_group = { + .attrs = initiate_attrs, +}; + +static struct kset *dump_kset; + +static ssize_t dump_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct dump_attribute *attribute; + struct dump_obj *dump; + + attribute = to_dump_attr(attr); + dump = to_dump_obj(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(dump, attribute, buf); +} + +static ssize_t dump_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct dump_attribute *attribute; + struct dump_obj *dump; + + attribute = to_dump_attr(attr); + dump = to_dump_obj(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(dump, attribute, buf, len); +} + +static const struct sysfs_ops dump_sysfs_ops = { + .show = dump_attr_show, + .store = dump_attr_store, +}; + +static void dump_release(struct kobject *kobj) +{ + struct dump_obj *dump; + + dump = to_dump_obj(kobj); + vfree(dump->buffer); + kfree(dump); +} + +static struct attribute *dump_default_attrs[] = { + &id_attribute.attr, + &type_attribute.attr, + &ack_attribute.attr, + NULL, +}; +ATTRIBUTE_GROUPS(dump_default); + +static struct kobj_type dump_ktype = { + .sysfs_ops = &dump_sysfs_ops, + .release = &dump_release, + .default_groups = dump_default_groups, +}; + +static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type) +{ + __be32 id, size, type; + int rc; + + type = cpu_to_be32(0xffffffff); + + rc = opal_dump_info2(&id, &size, &type); + if (rc == OPAL_PARAMETER) + rc = opal_dump_info(&id, &size); + + if (rc) { + pr_warn("%s: Failed to get dump info (%d)\n", + __func__, rc); + return rc; + } + + *dump_id = be32_to_cpu(id); + *dump_size = be32_to_cpu(size); + *dump_type = be32_to_cpu(type); + + return rc; +} + +static int64_t dump_read_data(struct dump_obj *dump) +{ + struct opal_sg_list *list; + uint64_t addr; + int64_t rc; + + /* Allocate memory */ + dump->buffer = vzalloc(PAGE_ALIGN(dump->size)); + if (!dump->buffer) { + pr_err("%s : Failed to allocate memory\n", __func__); + rc = -ENOMEM; + goto out; + } + + /* Generate SG list */ + list = opal_vmalloc_to_sg_list(dump->buffer, dump->size); + if (!list) { + rc = -ENOMEM; + goto out; + } + + /* First entry address */ + addr = __pa(list); + + /* Fetch data */ + rc = OPAL_BUSY_EVENT; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_dump_read(dump->id, addr); + if (rc == OPAL_BUSY_EVENT) { + opal_poll_events(NULL); + msleep(20); + } + } + + if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) + pr_warn("%s: Extract dump failed for ID 0x%x\n", + __func__, dump->id); + + /* Free SG list */ + opal_free_sg_list(list); + +out: + return rc; +} + +static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) +{ + ssize_t rc; + + struct dump_obj *dump = to_dump_obj(kobj); + + if (!dump->buffer) { + rc = dump_read_data(dump); + + if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) { + vfree(dump->buffer); + dump->buffer = NULL; + + return -EIO; + } + if (rc == OPAL_PARTIAL) { + /* On a partial read, we just return EIO + * and rely on userspace to ask us to try + * again. + */ + pr_info("%s: Platform dump partially read. ID = 0x%x\n", + __func__, dump->id); + return -EIO; + } + } + + memcpy(buffer, dump->buffer + pos, count); + + /* You may think we could free the dump buffer now and retrieve + * it again later if needed, but due to current firmware limitation, + * that's not the case. So, once read into userspace once, + * we keep the dump around until it's acknowledged by userspace. + */ + + return count; +} + +static void create_dump_obj(uint32_t id, size_t size, uint32_t type) +{ + struct dump_obj *dump; + int rc; + + dump = kzalloc(sizeof(*dump), GFP_KERNEL); + if (!dump) + return; + + dump->kobj.kset = dump_kset; + + kobject_init(&dump->kobj, &dump_ktype); + + sysfs_bin_attr_init(&dump->dump_attr); + + dump->dump_attr.attr.name = "dump"; + dump->dump_attr.attr.mode = 0400; + dump->dump_attr.size = size; + dump->dump_attr.read = dump_attr_read; + + dump->id = id; + dump->size = size; + dump->type = type; + + rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id); + if (rc) { + kobject_put(&dump->kobj); + return; + } + + /* + * As soon as the sysfs file for this dump is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the dump before kobject_uevent() is called. If that + * happens then there is a potential race between + * dump_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&dump->kobj); + rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr); + if (rc == 0) { + kobject_uevent(&dump->kobj, KOBJ_ADD); + + pr_info("%s: New platform dump. ID = 0x%x Size %u\n", + __func__, dump->id, dump->size); + } else { + /* Drop reference count taken for bin file */ + kobject_put(&dump->kobj); + } + + /* Drop our reference */ + kobject_put(&dump->kobj); + return; +} + +static irqreturn_t process_dump(int irq, void *data) +{ + int rc; + uint32_t dump_id, dump_size, dump_type; + char name[22]; + struct kobject *kobj; + + rc = dump_read_info(&dump_id, &dump_size, &dump_type); + if (rc != OPAL_SUCCESS) + return IRQ_HANDLED; + + sprintf(name, "0x%x-0x%x", dump_type, dump_id); + + /* we may get notified twice, let's handle + * that gracefully and not create two conflicting + * entries. + */ + kobj = kset_find_obj(dump_kset, name); + if (kobj) { + /* Drop reference added by kset_find_obj() */ + kobject_put(kobj); + return IRQ_HANDLED; + } + + create_dump_obj(dump_id, dump_size, dump_type); + + return IRQ_HANDLED; +} + +void __init opal_platform_dump_init(void) +{ + int rc; + int dump_irq; + + /* Dump not supported by firmware */ + if (!opal_check_token(OPAL_DUMP_READ)) + return; + + dump_kset = kset_create_and_add("dump", NULL, opal_kobj); + if (!dump_kset) { + pr_warn("%s: Failed to create dump kset\n", __func__); + return; + } + + rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group); + if (rc) { + pr_warn("%s: Failed to create initiate dump attr group\n", + __func__); + kobject_put(&dump_kset->kobj); + return; + } + + dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL)); + if (!dump_irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, dump_irq); + return; + } + + rc = request_threaded_irq(dump_irq, NULL, process_dump, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, + "opal-dump", NULL); + if (rc) { + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); + return; + } + + if (opal_check_token(OPAL_DUMP_RESEND)) + opal_dump_resend_notification(); +} diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c new file mode 100644 index 0000000000..554fdd7f88 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -0,0 +1,340 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Error log support on PowerNV. + * + * Copyright 2013,2014 IBM Corp. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct elog_obj { + struct kobject kobj; + struct bin_attribute raw_attr; + uint64_t id; + uint64_t type; + size_t size; + char *buffer; +}; +#define to_elog_obj(x) container_of(x, struct elog_obj, kobj) + +struct elog_attribute { + struct attribute attr; + ssize_t (*show)(struct elog_obj *elog, struct elog_attribute *attr, + char *buf); + ssize_t (*store)(struct elog_obj *elog, struct elog_attribute *attr, + const char *buf, size_t count); +}; +#define to_elog_attr(x) container_of(x, struct elog_attribute, attr) + +static ssize_t elog_id_show(struct elog_obj *elog_obj, + struct elog_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x%llx\n", elog_obj->id); +} + +static const char *elog_type_to_string(uint64_t type) +{ + switch (type) { + case 0: return "PEL"; + default: return "unknown"; + } +} + +static ssize_t elog_type_show(struct elog_obj *elog_obj, + struct elog_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x%llx %s\n", + elog_obj->type, + elog_type_to_string(elog_obj->type)); +} + +static ssize_t elog_ack_show(struct elog_obj *elog_obj, + struct elog_attribute *attr, + char *buf) +{ + return sprintf(buf, "ack - acknowledge log message\n"); +} + +static ssize_t elog_ack_store(struct elog_obj *elog_obj, + struct elog_attribute *attr, + const char *buf, + size_t count) +{ + /* + * Try to self remove this attribute. If we are successful, + * delete the kobject itself. + */ + if (sysfs_remove_file_self(&elog_obj->kobj, &attr->attr)) { + opal_send_ack_elog(elog_obj->id); + kobject_put(&elog_obj->kobj); + } + return count; +} + +static struct elog_attribute id_attribute = + __ATTR(id, 0444, elog_id_show, NULL); +static struct elog_attribute type_attribute = + __ATTR(type, 0444, elog_type_show, NULL); +static struct elog_attribute ack_attribute = + __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store); + +static struct kset *elog_kset; + +static ssize_t elog_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct elog_attribute *attribute; + struct elog_obj *elog; + + attribute = to_elog_attr(attr); + elog = to_elog_obj(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(elog, attribute, buf); +} + +static ssize_t elog_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct elog_attribute *attribute; + struct elog_obj *elog; + + attribute = to_elog_attr(attr); + elog = to_elog_obj(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(elog, attribute, buf, len); +} + +static const struct sysfs_ops elog_sysfs_ops = { + .show = elog_attr_show, + .store = elog_attr_store, +}; + +static void elog_release(struct kobject *kobj) +{ + struct elog_obj *elog; + + elog = to_elog_obj(kobj); + kfree(elog->buffer); + kfree(elog); +} + +static struct attribute *elog_default_attrs[] = { + &id_attribute.attr, + &type_attribute.attr, + &ack_attribute.attr, + NULL, +}; +ATTRIBUTE_GROUPS(elog_default); + +static struct kobj_type elog_ktype = { + .sysfs_ops = &elog_sysfs_ops, + .release = &elog_release, + .default_groups = elog_default_groups, +}; + +/* Maximum size of a single log on FSP is 16KB */ +#define OPAL_MAX_ERRLOG_SIZE 16384 + +static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) +{ + int opal_rc; + + struct elog_obj *elog = to_elog_obj(kobj); + + /* We may have had an error reading before, so let's retry */ + if (!elog->buffer) { + elog->buffer = kzalloc(elog->size, GFP_KERNEL); + if (!elog->buffer) + return -EIO; + + opal_rc = opal_read_elog(__pa(elog->buffer), + elog->size, elog->id); + if (opal_rc != OPAL_SUCCESS) { + pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n", + elog->id); + kfree(elog->buffer); + elog->buffer = NULL; + return -EIO; + } + } + + memcpy(buffer, elog->buffer + pos, count); + + return count; +} + +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) +{ + struct elog_obj *elog; + int rc; + + elog = kzalloc(sizeof(*elog), GFP_KERNEL); + if (!elog) + return; + + elog->kobj.kset = elog_kset; + + kobject_init(&elog->kobj, &elog_ktype); + + sysfs_bin_attr_init(&elog->raw_attr); + + elog->raw_attr.attr.name = "raw"; + elog->raw_attr.attr.mode = 0400; + elog->raw_attr.size = size; + elog->raw_attr.read = raw_attr_read; + + elog->id = id; + elog->size = size; + elog->type = type; + + elog->buffer = kzalloc(elog->size, GFP_KERNEL); + + if (elog->buffer) { + rc = opal_read_elog(__pa(elog->buffer), + elog->size, elog->id); + if (rc != OPAL_SUCCESS) { + pr_err("ELOG: log read failed for log-id=%llx\n", + elog->id); + kfree(elog->buffer); + elog->buffer = NULL; + } + } + + rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); + if (rc) { + kobject_put(&elog->kobj); + return; + } + + /* + * As soon as the sysfs file for this elog is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the elog before kobject_uevent() is called. If that + * happens then there is a potential race between + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&elog->kobj); + rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); + if (rc == 0) { + kobject_uevent(&elog->kobj, KOBJ_ADD); + } else { + /* Drop the reference taken for the bin file */ + kobject_put(&elog->kobj); + } + + /* Drop our reference */ + kobject_put(&elog->kobj); + + return; +} + +static irqreturn_t elog_event(int irq, void *data) +{ + __be64 size; + __be64 id; + __be64 type; + uint64_t elog_size; + uint64_t log_id; + uint64_t elog_type; + int rc; + char name[2+16+1]; + struct kobject *kobj; + + rc = opal_get_elog_size(&id, &size, &type); + if (rc != OPAL_SUCCESS) { + pr_err("ELOG: OPAL log info read failed\n"); + return IRQ_HANDLED; + } + + elog_size = be64_to_cpu(size); + log_id = be64_to_cpu(id); + elog_type = be64_to_cpu(type); + + WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); + + if (elog_size >= OPAL_MAX_ERRLOG_SIZE) + elog_size = OPAL_MAX_ERRLOG_SIZE; + + sprintf(name, "0x%llx", log_id); + + /* we may get notified twice, let's handle + * that gracefully and not create two conflicting + * entries. + */ + kobj = kset_find_obj(elog_kset, name); + if (kobj) { + /* Drop reference added by kset_find_obj() */ + kobject_put(kobj); + return IRQ_HANDLED; + } + + create_elog_obj(log_id, elog_size, elog_type); + + return IRQ_HANDLED; +} + +int __init opal_elog_init(void) +{ + int rc = 0, irq; + + /* ELOG not supported by firmware */ + if (!opal_check_token(OPAL_ELOG_READ)) + return -1; + + elog_kset = kset_create_and_add("elog", NULL, opal_kobj); + if (!elog_kset) { + pr_warn("%s: failed to create elog kset\n", __func__); + return -1; + } + + irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + rc = request_threaded_irq(irq, NULL, elog_event, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL); + if (rc) { + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, rc); + return rc; + } + + /* We are now ready to pull error logs from opal. */ + if (opal_check_token(OPAL_ELOG_RESEND)) + opal_resend_pending_logs(); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c new file mode 100644 index 0000000000..964f464b1b --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -0,0 +1,726 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Firmware-Assisted Dump support on POWER platform (OPAL). + * + * Copyright 2019, Hari Bathini, IBM Corporation. + */ + +#define pr_fmt(fmt) "opal fadump: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "opal-fadump.h" + + +#ifdef CONFIG_PRESERVE_FA_DUMP +/* + * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel, + * ensure crash data is preserved in hope that the subsequent memory + * preserving kernel boot is going to process this crash data. + */ +void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) +{ + const struct opal_fadump_mem_struct *opal_fdm_active; + const __be32 *prop; + unsigned long dn; + u64 addr = 0; + s64 ret; + + dn = of_get_flat_dt_subnode_by_name(node, "dump"); + if (dn == -FDT_ERR_NOTFOUND) + return; + + /* + * Check if dump has been initiated on last reboot. + */ + prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); + if (!prop) + return; + + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); + if ((ret != OPAL_SUCCESS) || !addr) { + pr_debug("Could not get Kernel metadata (%lld)\n", ret); + return; + } + + /* + * Preserve memory only if kernel memory regions are registered + * with f/w for MPIPL. + */ + addr = be64_to_cpu(addr); + pr_debug("Kernel metadata addr: %llx\n", addr); + opal_fdm_active = (void *)addr; + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) + return; + + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); + if ((ret != OPAL_SUCCESS) || !addr) { + pr_err("Failed to get boot memory tag (%lld)\n", ret); + return; + } + + /* + * Memory below this address can be used for booting a + * capture kernel or petitboot kernel. Preserve everything + * above this address for processing crashdump. + */ + fadump_conf->boot_mem_top = be64_to_cpu(addr); + pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top); + + pr_info("Firmware-assisted dump is active.\n"); + fadump_conf->dump_active = 1; +} + +#else /* CONFIG_PRESERVE_FA_DUMP */ +static const struct opal_fadump_mem_struct *opal_fdm_active; +static const struct opal_mpipl_fadump *opal_cpu_metadata; +static struct opal_fadump_mem_struct *opal_fdm; + +#ifdef CONFIG_OPAL_CORE +extern bool kernel_initiated; +#endif + +static int opal_fadump_unregister(struct fw_dump *fadump_conf); + +static void opal_fadump_update_config(struct fw_dump *fadump_conf, + const struct opal_fadump_mem_struct *fdm) +{ + pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); + + /* + * The destination address of the first boot memory region is the + * destination address of boot memory regions. + */ + fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); + pr_debug("Destination address of boot memory regions: %#016llx\n", + fadump_conf->boot_mem_dest_addr); + + fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); +} + +/* + * This function is called in the capture kernel to get configuration details + * from metadata setup by the first kernel. + */ +static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, + const struct opal_fadump_mem_struct *fdm) +{ + unsigned long base, size, last_end, hole_size; + int i; + + if (!fadump_conf->dump_active) + return; + + last_end = 0; + hole_size = 0; + fadump_conf->boot_memory_size = 0; + + pr_debug("Boot memory regions:\n"); + for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { + base = be64_to_cpu(fdm->rgn[i].src); + size = be64_to_cpu(fdm->rgn[i].size); + pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); + + fadump_conf->boot_mem_addr[i] = base; + fadump_conf->boot_mem_sz[i] = size; + fadump_conf->boot_memory_size += size; + hole_size += (base - last_end); + + last_end = base + size; + } + + /* + * Start address of reserve dump area (permanent reservation) for + * re-registering FADump after dump capture. + */ + fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); + + /* + * Rarely, but it can so happen that system crashes before all + * boot memory regions are registered for MPIPL. In such + * cases, warn that the vmcore may not be accurate and proceed + * anyway as that is the best bet considering free pages, cache + * pages, user pages, etc are usually filtered out. + * + * Hope the memory that could not be preserved only has pages + * that are usually filtered out while saving the vmcore. + */ + if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { + pr_warn("Not all memory regions were saved!!!\n"); + pr_warn(" Unsaved memory regions:\n"); + i = be16_to_cpu(fdm->registered_regions); + while (i < be16_to_cpu(fdm->region_cnt)) { + pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", + i, be64_to_cpu(fdm->rgn[i].src), + be64_to_cpu(fdm->rgn[i].size)); + i++; + } + + pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n"); + pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n"); + } + + fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); + fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); + opal_fadump_update_config(fadump_conf, fdm); +} + +/* Initialize kernel metadata */ +static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) +{ + fdm->version = OPAL_FADUMP_VERSION; + fdm->region_cnt = cpu_to_be16(0); + fdm->registered_regions = cpu_to_be16(0); + fdm->fadumphdr_addr = cpu_to_be64(0); +} + +static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) +{ + u64 addr = fadump_conf->reserve_dump_area_start; + u16 reg_cnt; + int i; + + opal_fdm = __va(fadump_conf->kernel_metadata); + opal_fadump_init_metadata(opal_fdm); + + /* Boot memory regions */ + reg_cnt = be16_to_cpu(opal_fdm->region_cnt); + for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { + opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + opal_fdm->rgn[i].dest = cpu_to_be64(addr); + opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); + + reg_cnt++; + addr += fadump_conf->boot_mem_sz[i]; + } + opal_fdm->region_cnt = cpu_to_be16(reg_cnt); + + /* + * Kernel metadata is passed to f/w and retrieved in capture kernel. + * So, use it to save fadump header address instead of calculating it. + */ + opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + + fadump_conf->boot_memory_size); + + opal_fadump_update_config(fadump_conf, opal_fdm); + + return addr; +} + +static u64 opal_fadump_get_metadata_size(void) +{ + return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct)); +} + +static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf) +{ + int err = 0; + s64 ret; + + /* + * Use the last page(s) in FADump memory reservation for + * kernel metadata. + */ + fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start + + fadump_conf->reserve_dump_area_size - + opal_fadump_get_metadata_size()); + pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata); + + /* Initialize kernel metadata before registering the address with f/w */ + opal_fdm = __va(fadump_conf->kernel_metadata); + opal_fadump_init_metadata(opal_fdm); + + /* + * Register metadata address with f/w. Can be retrieved in + * the capture kernel. + */ + ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, + fadump_conf->kernel_metadata); + if (ret != OPAL_SUCCESS) { + pr_err("Failed to set kernel metadata tag!\n"); + err = -EPERM; + } + + /* + * Register boot memory top address with f/w. Should be retrieved + * by a kernel that intends to preserve crash'ed kernel's memory. + */ + ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM, + fadump_conf->boot_mem_top); + if (ret != OPAL_SUCCESS) { + pr_err("Failed to set boot memory tag!\n"); + err = -EPERM; + } + + return err; +} + +static u64 opal_fadump_get_bootmem_min(void) +{ + return OPAL_FADUMP_MIN_BOOT_MEM; +} + +static int opal_fadump_register(struct fw_dump *fadump_conf) +{ + s64 rc = OPAL_PARAMETER; + u16 registered_regs; + int i, err = -EIO; + + registered_regs = be16_to_cpu(opal_fdm->registered_regions); + for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { + rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, + be64_to_cpu(opal_fdm->rgn[i].src), + be64_to_cpu(opal_fdm->rgn[i].dest), + be64_to_cpu(opal_fdm->rgn[i].size)); + if (rc != OPAL_SUCCESS) + break; + + registered_regs++; + } + opal_fdm->registered_regions = cpu_to_be16(registered_regs); + + switch (rc) { + case OPAL_SUCCESS: + pr_info("Registration is successful!\n"); + fadump_conf->dump_registered = 1; + err = 0; + break; + case OPAL_RESOURCE: + /* If MAX regions limit in f/w is hit, warn and proceed. */ + pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", + (be16_to_cpu(opal_fdm->region_cnt) - + be16_to_cpu(opal_fdm->registered_regions))); + fadump_conf->dump_registered = 1; + err = 0; + break; + case OPAL_PARAMETER: + pr_err("Failed to register. Parameter Error(%lld).\n", rc); + break; + case OPAL_HARDWARE: + pr_err("Support not available.\n"); + fadump_conf->fadump_supported = 0; + fadump_conf->fadump_enabled = 0; + break; + default: + pr_err("Failed to register. Unknown Error(%lld).\n", rc); + break; + } + + /* + * If some regions were registered before OPAL_MPIPL_ADD_RANGE + * OPAL call failed, unregister all regions. + */ + if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) + opal_fadump_unregister(fadump_conf); + + return err; +} + +static int opal_fadump_unregister(struct fw_dump *fadump_conf) +{ + s64 rc; + + rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0); + if (rc) { + pr_err("Failed to un-register - unexpected Error(%lld).\n", rc); + return -EIO; + } + + opal_fdm->registered_regions = cpu_to_be16(0); + fadump_conf->dump_registered = 0; + return 0; +} + +static int opal_fadump_invalidate(struct fw_dump *fadump_conf) +{ + s64 rc; + + rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0); + if (rc) { + pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc); + return -EIO; + } + + fadump_conf->dump_active = 0; + opal_fdm_active = NULL; + return 0; +} + +static void opal_fadump_cleanup(struct fw_dump *fadump_conf) +{ + s64 ret; + + ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0); + if (ret != OPAL_SUCCESS) + pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret); +} + +/* + * Verify if CPU state data is available. If available, do a bit of sanity + * checking before processing this data. + */ +static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf) +{ + if (!opal_cpu_metadata) + return false; + + fadump_conf->cpu_state_data_version = + be32_to_cpu(opal_cpu_metadata->cpu_data_version); + fadump_conf->cpu_state_entry_size = + be32_to_cpu(opal_cpu_metadata->cpu_data_size); + fadump_conf->cpu_state_dest_vaddr = + (u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest)); + fadump_conf->cpu_state_data_size = + be64_to_cpu(opal_cpu_metadata->region[0].size); + + if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) { + pr_warn("Supported CPU state data version: %u, found: %d!\n", + HDAT_FADUMP_CPU_DATA_VER, + fadump_conf->cpu_state_data_version); + pr_warn("WARNING: F/W using newer CPU state data format!!\n"); + } + + if ((fadump_conf->cpu_state_dest_vaddr == 0) || + (fadump_conf->cpu_state_entry_size == 0) || + (fadump_conf->cpu_state_entry_size > + fadump_conf->cpu_state_data_size)) { + pr_err("CPU state data is invalid. Ignoring!\n"); + return false; + } + + return true; +} + +/* + * Convert CPU state data saved at the time of crash into ELF notes. + * + * While the crashing CPU's register data is saved by the kernel, CPU state + * data for all CPUs is saved by f/w. In CPU state data provided by f/w, + * each register entry is of 16 bytes, a numerical identifier along with + * a GPR/SPR flag in the first 8 bytes and the register value in the next + * 8 bytes. For more details refer to F/W documentation. If this data is + * missing or in unsupported format, append crashing CPU's register data + * saved by the kernel in the PT_NOTE, to have something to work with in + * the vmcore file. + */ +static int __init +opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf, + struct fadump_crash_info_header *fdh) +{ + u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize; + struct hdat_fadump_thread_hdr *thdr; + bool is_cpu_data_valid = false; + u32 num_cpus = 1, *note_buf; + struct pt_regs regs; + char *bufp; + int rc, i; + + if (is_opal_fadump_cpu_data_valid(fadump_conf)) { + size_per_thread = fadump_conf->cpu_state_entry_size; + num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread); + bufp = __va(fadump_conf->cpu_state_dest_vaddr); + is_cpu_data_valid = true; + } + + rc = fadump_setup_cpu_notes_buf(num_cpus); + if (rc != 0) + return rc; + + note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr; + if (!is_cpu_data_valid) + goto out; + + /* + * Offset for register entries, entry size and registers count is + * duplicated in every thread header in keeping with HDAT format. + * Use these values from the first thread header. + */ + thdr = (struct hdat_fadump_thread_hdr *)bufp; + regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) + + be32_to_cpu(thdr->offset)); + reg_esize = be32_to_cpu(thdr->esize); + regs_cnt = be32_to_cpu(thdr->ecnt); + + pr_debug("--------CPU State Data------------\n"); + pr_debug("NumCpus : %u\n", num_cpus); + pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n", + regs_offset, reg_esize, regs_cnt); + + for (i = 0; i < num_cpus; i++, bufp += size_per_thread) { + thdr = (struct hdat_fadump_thread_hdr *)bufp; + + thread_pir = be32_to_cpu(thdr->pir); + pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n", + i, thread_pir, thdr->core_state); + + /* + * If this is kernel initiated crash, crashing_cpu would be set + * appropriately and register data of the crashing CPU saved by + * crashing kernel. Add this saved register data of crashing CPU + * to elf notes and populate the pt_regs for the remaining CPUs + * from register state data provided by firmware. + */ + if (fdh->crashing_cpu == thread_pir) { + note_buf = fadump_regs_to_elf_notes(note_buf, + &fdh->regs); + pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n", + fdh->crashing_cpu, fdh->regs.gpr[1], + fdh->regs.nip); + continue; + } + + /* + * Register state data of MAX cores is provided by firmware, + * but some of this cores may not be active. So, while + * processing register state data, check core state and + * skip threads that belong to inactive cores. + */ + if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE) + continue; + + opal_fadump_read_regs((bufp + regs_offset), regs_cnt, + reg_esize, true, ®s); + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n", + thread_pir, regs.gpr[1], regs.nip); + } + +out: + /* + * CPU state data is invalid/unsupported. Try appending crashing CPU's + * register data, if it is saved by the kernel. + */ + if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) { + if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) { + fadump_free_cpu_notes_buf(); + return -ENODEV; + } + + pr_warn("WARNING: appending only crashing CPU's register data\n"); + note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs)); + } + + final_note(note_buf); + + pr_debug("Updating elfcore header (%llx) with cpu notes\n", + fdh->elfcorehdr_addr); + fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); + return 0; +} + +static int __init opal_fadump_process(struct fw_dump *fadump_conf) +{ + struct fadump_crash_info_header *fdh; + int rc = -EINVAL; + + if (!opal_fdm_active || !fadump_conf->fadumphdr_addr) + return rc; + + /* Validate the fadump crash info header */ + fdh = __va(fadump_conf->fadumphdr_addr); + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + pr_err("Crash info header is not valid.\n"); + return rc; + } + +#ifdef CONFIG_OPAL_CORE + /* + * If this is a kernel initiated crash, crashing_cpu would be set + * appropriately and register data of the crashing CPU saved by + * crashing kernel. Add this saved register data of crashing CPU + * to elf notes and populate the pt_regs for the remaining CPUs + * from register state data provided by firmware. + */ + if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN) + kernel_initiated = true; +#endif + + rc = opal_fadump_build_cpu_notes(fadump_conf, fdh); + if (rc) + return rc; + + /* + * We are done validating dump info and elfcore header is now ready + * to be exported. set elfcorehdr_addr so that vmcore module will + * export the elfcore header through '/proc/vmcore'. + */ + elfcorehdr_addr = fdh->elfcorehdr_addr; + + return rc; +} + +static void opal_fadump_region_show(struct fw_dump *fadump_conf, + struct seq_file *m) +{ + const struct opal_fadump_mem_struct *fdm_ptr; + u64 dumped_bytes = 0; + int i; + + if (fadump_conf->dump_active) + fdm_ptr = opal_fdm_active; + else + fdm_ptr = opal_fdm; + + for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { + /* + * Only regions that are registered for MPIPL + * would have dump data. + */ + if ((fadump_conf->dump_active) && + (i < be16_to_cpu(fdm_ptr->registered_regions))) + dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); + + seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", + be64_to_cpu(fdm_ptr->rgn[i].src), + be64_to_cpu(fdm_ptr->rgn[i].dest)); + seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", + be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); + } + + /* Dump is active. Show preserved area start address. */ + if (fadump_conf->dump_active) { + seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n", + fadump_conf->boot_mem_top); + } +} + +static void opal_fadump_trigger(struct fadump_crash_info_header *fdh, + const char *msg) +{ + int rc; + + /* + * Unlike on pSeries platform, logical CPU number is not provided + * with architected register state data. So, store the crashing + * CPU's PIR instead to plug the appropriate register data for + * crashing CPU in the vmcore file. + */ + fdh->crashing_cpu = (u32)mfspr(SPRN_PIR); + + rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg); + if (rc == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported.\n", + OPAL_REBOOT_MPIPL); + } else if (rc == OPAL_HARDWARE) + pr_emerg("No backend support for MPIPL!\n"); +} + +static struct fadump_ops opal_fadump_ops = { + .fadump_init_mem_struct = opal_fadump_init_mem_struct, + .fadump_get_metadata_size = opal_fadump_get_metadata_size, + .fadump_setup_metadata = opal_fadump_setup_metadata, + .fadump_get_bootmem_min = opal_fadump_get_bootmem_min, + .fadump_register = opal_fadump_register, + .fadump_unregister = opal_fadump_unregister, + .fadump_invalidate = opal_fadump_invalidate, + .fadump_cleanup = opal_fadump_cleanup, + .fadump_process = opal_fadump_process, + .fadump_region_show = opal_fadump_region_show, + .fadump_trigger = opal_fadump_trigger, +}; + +void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) +{ + const __be32 *prop; + unsigned long dn; + __be64 be_addr; + u64 addr = 0; + int i, len; + s64 ret; + + /* + * Check if Firmware-Assisted Dump is supported. if yes, check + * if dump has been initiated on last reboot. + */ + dn = of_get_flat_dt_subnode_by_name(node, "dump"); + if (dn == -FDT_ERR_NOTFOUND) { + pr_debug("FADump support is missing!\n"); + return; + } + + if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) { + pr_err("Support missing for this f/w version!\n"); + return; + } + + prop = of_get_flat_dt_prop(dn, "fw-load-area", &len); + if (prop) { + /* + * Each f/w load area is an (address,size) pair, + * 2 cells each, totalling 4 cells per range. + */ + for (i = 0; i < len / (sizeof(*prop) * 4); i++) { + u64 base, end; + + base = of_read_number(prop + (i * 4) + 0, 2); + end = base; + end += of_read_number(prop + (i * 4) + 2, 2); + if (end > OPAL_FADUMP_MIN_BOOT_MEM) { + pr_err("F/W load area: 0x%llx-0x%llx\n", + base, end); + pr_err("F/W version not supported!\n"); + return; + } + } + } + + fadump_conf->ops = &opal_fadump_ops; + fadump_conf->fadump_supported = 1; + + /* + * Firmware supports 32-bit field for size. Align it to PAGE_SIZE + * and request firmware to copy multiple kernel boot memory regions. + */ + fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE); + + /* + * Check if dump has been initiated on last reboot. + */ + prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL); + if (!prop) + return; + + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); + if ((ret != OPAL_SUCCESS) || !be_addr) { + pr_err("Failed to get Kernel metadata (%lld)\n", ret); + return; + } + + addr = be64_to_cpu(be_addr); + pr_debug("Kernel metadata addr: %llx\n", addr); + + opal_fdm_active = __va(addr); + if (opal_fdm_active->version != OPAL_FADUMP_VERSION) { + pr_warn("Supported kernel metadata version: %u, found: %d!\n", + OPAL_FADUMP_VERSION, opal_fdm_active->version); + pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n"); + } + + /* Kernel regions not registered with f/w for MPIPL */ + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { + opal_fdm_active = NULL; + return; + } + + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); + if (be_addr) { + addr = be64_to_cpu(be_addr); + pr_debug("CPU metadata addr: %llx\n", addr); + opal_cpu_metadata = __va(addr); + } + + pr_info("Firmware-assisted dump is active.\n"); + fadump_conf->dump_active = 1; + opal_fadump_get_config(fadump_conf, opal_fdm_active); +} +#endif /* !CONFIG_PRESERVE_FA_DUMP */ diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h new file mode 100644 index 0000000000..3f715efb0a --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Firmware-Assisted Dump support on POWER platform (OPAL). + * + * Copyright 2019, Hari Bathini, IBM Corporation. + */ + +#ifndef _POWERNV_OPAL_FADUMP_H +#define _POWERNV_OPAL_FADUMP_H + +#include + +/* + * With kernel & initrd loaded at 512MB (with 256MB size), enforce a minimum + * boot memory size of 768MB to ensure f/w loading kernel and initrd doesn't + * mess with crash'ed kernel's memory during MPIPL. + */ +#define OPAL_FADUMP_MIN_BOOT_MEM (0x30000000UL) + +/* + * OPAL FADump metadata structure format version + * + * OPAL FADump kernel metadata structure stores kernel metadata needed to + * register-for/process crash dump. Format version is used to keep a tab on + * the changes in the structure format. The changes, if any, to the format + * are expected to be minimal and backward compatible. + */ +#define OPAL_FADUMP_VERSION 0x1 + +/* + * OPAL FADump kernel metadata + * + * The address of this structure will be registered with f/w for retrieving + * in the capture kernel to process the crash dump. + */ +struct opal_fadump_mem_struct { + u8 version; + u8 reserved[3]; + __be16 region_cnt; /* number of regions */ + __be16 registered_regions; /* Regions registered for MPIPL */ + __be64 fadumphdr_addr; + struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS]; +} __packed; + +/* + * CPU state data + * + * CPU state data information is provided by f/w. The format for this data + * is defined in the HDAT spec. Version is used to keep a tab on the changes + * in this CPU state data format. Changes to this format are unlikely, but + * if there are any changes, please refer to latest HDAT specification. + */ +#define HDAT_FADUMP_CPU_DATA_VER 1 + +#define HDAT_FADUMP_CORE_INACTIVE (0x0F) + +/* HDAT thread header for register entries */ +struct hdat_fadump_thread_hdr { + __be32 pir; + /* 0x00 - 0x0F - The corresponding stop state of the core */ + u8 core_state; + u8 reserved[3]; + + __be32 offset; /* Offset to Register Entries array */ + __be32 ecnt; /* Number of entries */ + __be32 esize; /* Alloc size of each array entry in bytes */ + __be32 eactsz; /* Actual size of each array entry in bytes */ +} __packed; + +/* Register types populated by f/w */ +#define HDAT_FADUMP_REG_TYPE_GPR 0x01 +#define HDAT_FADUMP_REG_TYPE_SPR 0x02 + +/* ID numbers used by f/w while populating certain registers */ +#define HDAT_FADUMP_REG_ID_NIP 0x7D0 +#define HDAT_FADUMP_REG_ID_MSR 0x7D1 +#define HDAT_FADUMP_REG_ID_CCR 0x7D2 + +/* HDAT register entry. */ +struct hdat_fadump_reg_entry { + __be32 reg_type; + __be32 reg_num; + __be64 reg_val; +} __packed; + +static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs, + u32 reg_type, u32 reg_num, + u64 reg_val) +{ + if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) { + if (reg_num < 32) + regs->gpr[reg_num] = reg_val; + return; + } + + switch (reg_num) { + case SPRN_CTR: + regs->ctr = reg_val; + break; + case SPRN_LR: + regs->link = reg_val; + break; + case SPRN_XER: + regs->xer = reg_val; + break; + case SPRN_DAR: + regs->dar = reg_val; + break; + case SPRN_DSISR: + regs->dsisr = reg_val; + break; + case HDAT_FADUMP_REG_ID_NIP: + regs->nip = reg_val; + break; + case HDAT_FADUMP_REG_ID_MSR: + regs->msr = reg_val; + break; + case HDAT_FADUMP_REG_ID_CCR: + regs->ccr = reg_val; + break; + } +} + +static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, + unsigned int reg_entry_size, + bool cpu_endian, + struct pt_regs *regs) +{ + struct hdat_fadump_reg_entry *reg_entry; + u64 val; + int i; + + memset(regs, 0, sizeof(struct pt_regs)); + + for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { + reg_entry = (struct hdat_fadump_reg_entry *)bufp; + val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : + (u64)(reg_entry->reg_val)); + opal_fadump_set_regval_regnum(regs, + be32_to_cpu(reg_entry->reg_type), + be32_to_cpu(reg_entry->reg_num), + val); + } +} + +#endif /* _POWERNV_OPAL_FADUMP_H */ diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c new file mode 100644 index 0000000000..d5ea04e8e4 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL Firmware Update Interface + * + * Copyright 2013 IBM Corp. + */ + +#define DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* FLASH status codes */ +#define FLASH_NO_OP -1099 /* No operation initiated by user */ +#define FLASH_NO_AUTH -9002 /* Not a service authority partition */ + +/* Validate image status values */ +#define VALIDATE_IMG_READY -1001 /* Image ready for validation */ +#define VALIDATE_IMG_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */ + +/* Manage image status values */ +#define MANAGE_ACTIVE_ERR -9001 /* Cannot overwrite active img */ + +/* Flash image status values */ +#define FLASH_IMG_READY 0 /* Img ready for flash on reboot */ +#define FLASH_INVALID_IMG -1003 /* Flash image shorter than expected */ +#define FLASH_IMG_NULL_DATA -1004 /* Bad data in sg list entry */ +#define FLASH_IMG_BAD_LEN -1005 /* Bad length in sg list entry */ + +/* Manage operation tokens */ +#define FLASH_REJECT_TMP_SIDE 0 /* Reject temporary fw image */ +#define FLASH_COMMIT_TMP_SIDE 1 /* Commit temporary fw image */ + +/* Update tokens */ +#define FLASH_UPDATE_CANCEL 0 /* Cancel update request */ +#define FLASH_UPDATE_INIT 1 /* Initiate update */ + +/* Validate image update result tokens */ +#define VALIDATE_TMP_UPDATE 0 /* T side will be updated */ +#define VALIDATE_FLASH_AUTH 1 /* Partition does not have authority */ +#define VALIDATE_INVALID_IMG 2 /* Candidate image is not valid */ +#define VALIDATE_CUR_UNKNOWN 3 /* Current fixpack level is unknown */ +/* + * Current T side will be committed to P side before being replace with new + * image, and the new image is downlevel from current image + */ +#define VALIDATE_TMP_COMMIT_DL 4 +/* + * Current T side will be committed to P side before being replaced with new + * image + */ +#define VALIDATE_TMP_COMMIT 5 +/* + * T side will be updated with a downlevel image + */ +#define VALIDATE_TMP_UPDATE_DL 6 +/* + * The candidate image's release date is later than the system's firmware + * service entitlement date - service warranty period has expired + */ +#define VALIDATE_OUT_OF_WRNTY 7 + +/* Validate buffer size */ +#define VALIDATE_BUF_SIZE 4096 + +/* XXX: Assume candidate image size is <= 1GB */ +#define MAX_IMAGE_SIZE 0x40000000 + +/* Image status */ +enum { + IMAGE_INVALID, + IMAGE_LOADING, + IMAGE_READY, +}; + +/* Candidate image data */ +struct image_data_t { + int status; + void *data; + uint32_t size; +}; + +/* Candidate image header */ +struct image_header_t { + uint16_t magic; + uint16_t version; + uint32_t size; +}; + +struct validate_flash_t { + int status; /* Return status */ + void *buf; /* Candidate image buffer */ + uint32_t buf_size; /* Image size */ + uint32_t result; /* Update results token */ +}; + +struct manage_flash_t { + int status; /* Return status */ +}; + +struct update_flash_t { + int status; /* Return status */ +}; + +static struct image_header_t image_header; +static struct image_data_t image_data; +static struct validate_flash_t validate_flash_data; +static struct manage_flash_t manage_flash_data; + +/* Initialize update_flash_data status to No Operation */ +static struct update_flash_t update_flash_data = { + .status = FLASH_NO_OP, +}; + +static DEFINE_MUTEX(image_data_mutex); + +/* + * Validate candidate image + */ +static inline void opal_flash_validate(void) +{ + long ret; + void *buf = validate_flash_data.buf; + __be32 size = cpu_to_be32(validate_flash_data.buf_size); + __be32 result; + + ret = opal_validate_flash(__pa(buf), &size, &result); + + validate_flash_data.status = ret; + validate_flash_data.buf_size = be32_to_cpu(size); + validate_flash_data.result = be32_to_cpu(result); +} + +/* + * Validate output format: + * validate result token + * current image version details + * new image version details + */ +static ssize_t validate_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct validate_flash_t *args_buf = &validate_flash_data; + int len; + + /* Candidate image is not validated */ + if (args_buf->status < VALIDATE_TMP_UPDATE) { + len = sprintf(buf, "%d\n", args_buf->status); + goto out; + } + + /* Result token */ + len = sprintf(buf, "%d\n", args_buf->result); + + /* Current and candidate image version details */ + if ((args_buf->result != VALIDATE_TMP_UPDATE) && + (args_buf->result < VALIDATE_CUR_UNKNOWN)) + goto out; + + if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) { + memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len); + len = VALIDATE_BUF_SIZE; + } else { + memcpy(buf + len, args_buf->buf, args_buf->buf_size); + len += args_buf->buf_size; + } +out: + /* Set status to default */ + args_buf->status = FLASH_NO_OP; + return len; +} + +/* + * Validate candidate firmware image + * + * Note: + * We are only interested in first 4K bytes of the + * candidate image. + */ +static ssize_t validate_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct validate_flash_t *args_buf = &validate_flash_data; + + if (buf[0] != '1') + return -EINVAL; + + mutex_lock(&image_data_mutex); + + if (image_data.status != IMAGE_READY || + image_data.size < VALIDATE_BUF_SIZE) { + args_buf->result = VALIDATE_INVALID_IMG; + args_buf->status = VALIDATE_IMG_INCOMPLETE; + goto out; + } + + /* Copy first 4k bytes of candidate image */ + memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE); + + args_buf->status = VALIDATE_IMG_READY; + args_buf->buf_size = VALIDATE_BUF_SIZE; + + /* Validate candidate image */ + opal_flash_validate(); + +out: + mutex_unlock(&image_data_mutex); + return count; +} + +/* + * Manage flash routine + */ +static inline void opal_flash_manage(uint8_t op) +{ + struct manage_flash_t *const args_buf = &manage_flash_data; + + args_buf->status = opal_manage_flash(op); +} + +/* + * Show manage flash status + */ +static ssize_t manage_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct manage_flash_t *const args_buf = &manage_flash_data; + int rc; + + rc = sprintf(buf, "%d\n", args_buf->status); + /* Set status to default*/ + args_buf->status = FLASH_NO_OP; + return rc; +} + +/* + * Manage operations: + * 0 - Reject + * 1 - Commit + */ +static ssize_t manage_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + uint8_t op; + switch (buf[0]) { + case '0': + op = FLASH_REJECT_TMP_SIDE; + break; + case '1': + op = FLASH_COMMIT_TMP_SIDE; + break; + default: + return -EINVAL; + } + + /* commit/reject temporary image */ + opal_flash_manage(op); + return count; +} + +/* + * OPAL update flash + */ +static int opal_flash_update(int op) +{ + struct opal_sg_list *list; + unsigned long addr; + int64_t rc = OPAL_PARAMETER; + + if (op == FLASH_UPDATE_CANCEL) { + pr_alert("FLASH: Image update cancelled\n"); + addr = '\0'; + goto flash; + } + + list = opal_vmalloc_to_sg_list(image_data.data, image_data.size); + if (!list) + goto invalid_img; + + /* First entry address */ + addr = __pa(list); + +flash: + rc = opal_update_flash(addr); + +invalid_img: + return rc; +} + +/* This gets called just before system reboots */ +void opal_flash_update_print_message(void) +{ + if (update_flash_data.status != FLASH_IMG_READY) + return; + + pr_alert("FLASH: Flashing new firmware\n"); + pr_alert("FLASH: Image is %u bytes\n", image_data.size); + pr_alert("FLASH: Performing flash and reboot/shutdown\n"); + pr_alert("FLASH: This will take several minutes. Do not power off!\n"); + + /* Small delay to help getting the above message out */ + msleep(500); +} + +/* + * Show candidate image status + */ +static ssize_t update_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct update_flash_t *const args_buf = &update_flash_data; + return sprintf(buf, "%d\n", args_buf->status); +} + +/* + * Set update image flag + * 1 - Flash new image + * 0 - Cancel flash request + */ +static ssize_t update_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct update_flash_t *const args_buf = &update_flash_data; + int rc = count; + + mutex_lock(&image_data_mutex); + + switch (buf[0]) { + case '0': + if (args_buf->status == FLASH_IMG_READY) + opal_flash_update(FLASH_UPDATE_CANCEL); + args_buf->status = FLASH_NO_OP; + break; + case '1': + /* Image is loaded? */ + if (image_data.status == IMAGE_READY) + args_buf->status = + opal_flash_update(FLASH_UPDATE_INIT); + else + args_buf->status = FLASH_INVALID_IMG; + break; + default: + rc = -EINVAL; + } + + mutex_unlock(&image_data_mutex); + return rc; +} + +/* + * Free image buffer + */ +static void free_image_buf(void) +{ + void *addr; + int size; + + addr = image_data.data; + size = PAGE_ALIGN(image_data.size); + while (size > 0) { + ClearPageReserved(vmalloc_to_page(addr)); + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + vfree(image_data.data); + image_data.data = NULL; + image_data.status = IMAGE_INVALID; +} + +/* + * Allocate image buffer. + */ +static int alloc_image_buf(char *buffer, size_t count) +{ + void *addr; + int size; + + if (count < sizeof(image_header)) { + pr_warn("FLASH: Invalid candidate image\n"); + return -EINVAL; + } + + memcpy(&image_header, (void *)buffer, sizeof(image_header)); + image_data.size = be32_to_cpu(image_header.size); + pr_debug("FLASH: Candidate image size = %u\n", image_data.size); + + if (image_data.size > MAX_IMAGE_SIZE) { + pr_warn("FLASH: Too large image\n"); + return -EINVAL; + } + if (image_data.size < VALIDATE_BUF_SIZE) { + pr_warn("FLASH: Image is shorter than expected\n"); + return -EINVAL; + } + + image_data.data = vzalloc(PAGE_ALIGN(image_data.size)); + if (!image_data.data) { + pr_err("%s : Failed to allocate memory\n", __func__); + return -ENOMEM; + } + + /* Pin memory */ + addr = image_data.data; + size = PAGE_ALIGN(image_data.size); + while (size > 0) { + SetPageReserved(vmalloc_to_page(addr)); + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + image_data.status = IMAGE_LOADING; + return 0; +} + +/* + * Copy candidate image + * + * Parse candidate image header to get total image size + * and pre-allocate required memory. + */ +static ssize_t image_data_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) +{ + int rc; + + mutex_lock(&image_data_mutex); + + /* New image ? */ + if (pos == 0) { + /* Free memory, if already allocated */ + if (image_data.data) + free_image_buf(); + + /* Cancel outstanding image update request */ + if (update_flash_data.status == FLASH_IMG_READY) + opal_flash_update(FLASH_UPDATE_CANCEL); + + /* Allocate memory */ + rc = alloc_image_buf(buffer, count); + if (rc) + goto out; + } + + if (image_data.status != IMAGE_LOADING) { + rc = -ENOMEM; + goto out; + } + + if ((pos + count) > image_data.size) { + rc = -EINVAL; + goto out; + } + + memcpy(image_data.data + pos, (void *)buffer, count); + rc = count; + + /* Set image status */ + if ((pos + count) == image_data.size) { + pr_debug("FLASH: Candidate image loaded....\n"); + image_data.status = IMAGE_READY; + } + +out: + mutex_unlock(&image_data_mutex); + return rc; +} + +/* + * sysfs interface : + * OPAL uses below sysfs files for code update. + * We create these files under /sys/firmware/opal. + * + * image : Interface to load candidate firmware image + * validate_flash : Validate firmware image + * manage_flash : Commit/Reject firmware image + * update_flash : Flash new firmware image + * + */ +static const struct bin_attribute image_data_attr = { + .attr = {.name = "image", .mode = 0200}, + .size = MAX_IMAGE_SIZE, /* Limit image size */ + .write = image_data_write, +}; + +static struct kobj_attribute validate_attribute = + __ATTR(validate_flash, 0600, validate_show, validate_store); + +static struct kobj_attribute manage_attribute = + __ATTR(manage_flash, 0600, manage_show, manage_store); + +static struct kobj_attribute update_attribute = + __ATTR(update_flash, 0600, update_show, update_store); + +static struct attribute *image_op_attrs[] = { + &validate_attribute.attr, + &manage_attribute.attr, + &update_attribute.attr, + NULL /* need to NULL terminate the list of attributes */ +}; + +static const struct attribute_group image_op_attr_group = { + .attrs = image_op_attrs, +}; + +void __init opal_flash_update_init(void) +{ + int ret; + + /* Firmware update is not supported by firmware */ + if (!opal_check_token(OPAL_FLASH_VALIDATE)) + return; + + /* Allocate validate image buffer */ + validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL); + if (!validate_flash_data.buf) { + pr_err("%s : Failed to allocate memory\n", __func__); + return; + } + + /* Make sure /sys/firmware/opal directory is created */ + if (!opal_kobj) { + pr_warn("FLASH: opal kobject is not available\n"); + goto nokobj; + } + + /* Create the sysfs files */ + ret = sysfs_create_group(opal_kobj, &image_op_attr_group); + if (ret) { + pr_warn("FLASH: Failed to create sysfs files\n"); + goto nokobj; + } + + ret = sysfs_create_bin_file(opal_kobj, &image_data_attr); + if (ret) { + pr_warn("FLASH: Failed to create sysfs files\n"); + goto nosysfs_file; + } + + /* Set default status */ + validate_flash_data.status = FLASH_NO_OP; + manage_flash_data.status = FLASH_NO_OP; + update_flash_data.status = FLASH_NO_OP; + image_data.status = IMAGE_INVALID; + return; + +nosysfs_file: + sysfs_remove_group(opal_kobj, &image_op_attr_group); + +nokobj: + kfree(validate_flash_data.buf); + return; +} diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c new file mode 100644 index 0000000000..f0c1830deb --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -0,0 +1,381 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * OPAL hypervisor Maintenance interrupt handling support in PowerNV. + * + * Copyright 2014 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "powernv.h" + +static int opal_hmi_handler_nb_init; +struct OpalHmiEvtNode { + struct list_head list; + struct OpalHMIEvent hmi_evt; +}; + +struct xstop_reason { + uint32_t xstop_reason; + const char *unit_failed; + const char *description; +}; + +static LIST_HEAD(opal_hmi_evt_list); +static DEFINE_SPINLOCK(opal_hmi_evt_lock); + +static void print_core_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { CORE_CHECKSTOP_IFU_REGFILE, "IFU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_DURING_RECOV, "PC", + "Core checkstop during recovery" }, + { CORE_CHECKSTOP_ISU_REGFILE, "ISU", + "RegFile core check stop (mapper error)" }, + { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" }, + { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" }, + { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC", + "Recovery in maintenance mode" }, + { CORE_CHECKSTOP_LSU_REGFILE, "LSU", + "RegFile core check stop" }, + { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC", + "Forward Progress Error" }, + { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" }, + { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC", + "Hypervisor Resource error - core check stop" }, + { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC", + "Hang Recovery Failed (core check stop)" }, + { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC", + "Ambiguous Hang Detected (unknown source)" }, + { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC", + "Debug Trigger Error inject" }, + { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC", + "Hypervisor check stop via SPRC/SPRD" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown Core check stop.\n", level); + return; + } + + printk("%s CPU PIR: %08x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.pir)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_nx_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + int i; + static const struct xstop_reason xstop_reason[] = { + { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine", + "SHM invalid state error" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine", + "DMA invalid state error bit 15" }, + { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine", + "DMA invalid state error bit 16" }, + { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine", + "Channel 0 invalid state error" }, + { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine", + "Channel 1 invalid state error" }, + { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine", + "Channel 2 invalid state error" }, + { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine", + "Channel 3 invalid state error" }, + { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine", + "Channel 4 invalid state error" }, + { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine", + "Channel 5 invalid state error" }, + { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine", + "Channel 6 invalid state error" }, + { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine", + "Channel 7 invalid state error" }, + { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine", + "UE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine", + "SUE error on CRB(CSB address, CCB)" }, + { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface", + "CRB Kill ISN received while holding ISN with UE error" }, + }; + + /* Validity check */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s Unknown NX check stop.\n", level); + return; + } + + printk("%s NX checkstop on CHIP ID: %x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id)); + for (i = 0; i < ARRAY_SIZE(xstop_reason); i++) + if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) & + xstop_reason[i].xstop_reason) + printk("%s [Unit: %-3s] %s\n", level, + xstop_reason[i].unit_failed, + xstop_reason[i].description); +} + +static void print_npu_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + uint8_t reason, reason_count, i; + + /* + * We may not have a checkstop reason on some combination of + * hardware and/or skiboot version + */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s NPU checkstop on chip %x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id)); + return; + } + + /* + * NPU2 has 3 FIRs. Reason encoded on a byte as: + * 2 bits for the FIR number + * 6 bits for the bit number + * It may be possible to find several reasons. + * + * We don't display a specific message per FIR bit as there + * are too many and most are meaningless without the workbook + * and/or hw team help anyway. + */ + reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) / + sizeof(reason); + for (i = 0; i < reason_count; i++) { + reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF; + if (reason) + printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n", + level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id), + reason >> 6, reason & 0x3F); + } +} + +static void print_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + uint8_t type = hmi_evt->u.xstop_error.xstop_type; + switch (type) { + case CHECKSTOP_TYPE_CORE: + print_core_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_NX: + print_nx_checkstop_reason(level, hmi_evt); + break; + case CHECKSTOP_TYPE_NPU: + print_npu_checkstop_reason(level, hmi_evt); + break; + default: + printk("%s Unknown Malfunction Alert of type %d\n", + level, type); + break; + } +} + +static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt) +{ + const char *level, *sevstr, *error_info; + static const char *hmi_error_types[] = { + "Malfunction Alert", + "Processor Recovery done", + "Processor recovery occurred again", + "Processor recovery occurred for masked error", + "Timer facility experienced an error", + "TFMR SPR is corrupted", + "UPS (Uninterrupted Power System) Overflow indication", + "An XSCOM operation failure", + "An XSCOM operation completed", + "SCOM has set a reserved FIR bit to cause recovery", + "Debug trigger has set a reserved FIR bit to cause recovery", + "A hypervisor resource error occurred", + "CAPP recovery process is in progress", + }; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + /* Print things out */ + if (hmi_evt->version < OpalHMIEvt_V1) { + pr_err("HMI Interrupt, Unknown event version %d !\n", + hmi_evt->version); + return; + } + switch (hmi_evt->severity) { + case OpalHMI_SEV_NO_ERROR: + level = KERN_INFO; + sevstr = "Harmless"; + break; + case OpalHMI_SEV_WARNING: + level = KERN_WARNING; + sevstr = ""; + break; + case OpalHMI_SEV_ERROR_SYNC: + level = KERN_ERR; + sevstr = "Severe"; + break; + case OpalHMI_SEV_FATAL: + default: + level = KERN_ERR; + sevstr = "Fatal"; + break; + } + + if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) { + printk("%s%s Hypervisor Maintenance interrupt [%s]\n", + level, sevstr, + hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ? + hmi_error_types[hmi_evt->type] + : "Unknown"; + printk("%s Error detail: %s\n", level, error_info); + printk("%s HMER: %016llx\n", level, + be64_to_cpu(hmi_evt->hmer)); + if ((hmi_evt->type == OpalHMI_ERROR_TFAC) || + (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY)) + printk("%s TFMR: %016llx\n", level, + be64_to_cpu(hmi_evt->tfmr)); + } + + if (hmi_evt->version < OpalHMIEvt_V2) + return; + + /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */ + if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT) + print_checkstop_reason(level, hmi_evt); +} + +static void hmi_event_handler(struct work_struct *work) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct OpalHmiEvtNode *msg_node; + uint8_t disposition; + struct opal_msg msg; + int unrecoverable = 0; + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + while (!list_empty(&opal_hmi_evt_list)) { + msg_node = list_entry(opal_hmi_evt_list.next, + struct OpalHmiEvtNode, list); + list_del(&msg_node->list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt; + print_hmi_event_info(hmi_evt); + disposition = hmi_evt->disposition; + kfree(msg_node); + + /* + * Check if HMI event has been recovered or not. If not + * then kernel can't continue, we need to panic. + * But before we do that, display all the HMI event + * available on the list and set unrecoverable flag to 1. + */ + if (disposition != OpalHMI_DISPOSITION_RECOVERED) + unrecoverable = 1; + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + } + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + if (unrecoverable) { + /* Pull all HMI events from OPAL before we panic. */ + while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) { + u32 type; + + type = be32_to_cpu(msg.msg_type); + + /* skip if not HMI event */ + if (type != OPAL_MSG_HMI_EVT) + continue; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&msg.params[0]; + print_hmi_event_info(hmi_evt); + } + + pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception"); + } +} + +static DECLARE_WORK(hmi_event_work, hmi_event_handler); +/* + * opal_handle_hmi_event - notifier handler that queues up HMI events + * to be preocessed later. + */ +static int opal_handle_hmi_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + unsigned long flags; + struct OpalHMIEvent *hmi_evt; + struct opal_msg *hmi_msg = msg; + struct OpalHmiEvtNode *msg_node; + + /* Sanity Checks */ + if (msg_type != OPAL_MSG_HMI_EVT) + return 0; + + /* HMI event info starts from param[0] */ + hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0]; + + /* Delay the logging of HMI events to workqueue. */ + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (!msg_node) { + pr_err("HMI: out of memory, Opal message event not handled\n"); + return -ENOMEM; + } + memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt)); + + spin_lock_irqsave(&opal_hmi_evt_lock, flags); + list_add(&msg_node->list, &opal_hmi_evt_list); + spin_unlock_irqrestore(&opal_hmi_evt_lock, flags); + + schedule_work(&hmi_event_work); + return 0; +} + +static struct notifier_block opal_hmi_handler_nb = { + .notifier_call = opal_handle_hmi_event, + .next = NULL, + .priority = 0, +}; + +int __init opal_hmi_handler_init(void) +{ + int ret; + + if (!opal_hmi_handler_nb_init) { + ret = opal_message_notifier_register( + OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + opal_hmi_handler_nb_init = 1; + } + return 0; +} diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c new file mode 100644 index 0000000000..828fc4d884 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * OPAL IMC interface detection driver + * Supported on POWERNV platform + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct dentry *imc_debugfs_parent; + +/* Helpers to export imc command and mode via debugfs */ +static int imc_mem_get(void *data, u64 *val) +{ + *val = cpu_to_be64(*(u64 *)data); + return 0; +} + +static int imc_mem_set(void *data, u64 val) +{ + *(u64 *)data = cpu_to_be64(val); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n"); + +static void imc_debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + debugfs_create_file_unsafe(name, mode, parent, value, &fops_imc_x64); +} + +/* + * export_imc_mode_and_cmd: Create a debugfs interface + * for imc_cmd and imc_mode + * for each node in the system. + * imc_mode and imc_cmd can be changed by echo into + * this interface. + */ +static void export_imc_mode_and_cmd(struct device_node *node, + struct imc_pmu *pmu_ptr) +{ + static u64 loc, *imc_mode_addr, *imc_cmd_addr; + char mode[16], cmd[16]; + u32 cb_offset; + struct imc_mem_info *ptr = pmu_ptr->mem_info; + + imc_debugfs_parent = debugfs_create_dir("imc", arch_debugfs_dir); + + if (of_property_read_u32(node, "cb_offset", &cb_offset)) + cb_offset = IMC_CNTL_BLK_OFFSET; + + while (ptr->vbase != NULL) { + loc = (u64)(ptr->vbase) + cb_offset; + imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET); + sprintf(mode, "imc_mode_%d", (u32)(ptr->id)); + imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, + imc_mode_addr); + + imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET); + sprintf(cmd, "imc_cmd_%d", (u32)(ptr->id)); + imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, + imc_cmd_addr); + ptr++; + } +} + +/* + * imc_get_mem_addr_nest: Function to get nest counter memory region + * for each chip + */ +static int imc_get_mem_addr_nest(struct device_node *node, + struct imc_pmu *pmu_ptr, + u32 offset) +{ + int nr_chips = 0, i; + u64 *base_addr_arr, baddr; + u32 *chipid_arr; + + nr_chips = of_property_count_u32_elems(node, "chip-id"); + if (nr_chips <= 0) + return -ENODEV; + + base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL); + if (!base_addr_arr) + return -ENOMEM; + + chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL); + if (!chipid_arr) { + kfree(base_addr_arr); + return -ENOMEM; + } + + if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips)) + goto error; + + if (of_property_read_u64_array(node, "base-addr", base_addr_arr, + nr_chips)) + goto error; + + pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info), + GFP_KERNEL); + if (!pmu_ptr->mem_info) + goto error; + + for (i = 0; i < nr_chips; i++) { + pmu_ptr->mem_info[i].id = chipid_arr[i]; + baddr = base_addr_arr[i] + offset; + pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr); + } + + pmu_ptr->imc_counter_mmaped = true; + kfree(base_addr_arr); + kfree(chipid_arr); + return 0; + +error: + kfree(base_addr_arr); + kfree(chipid_arr); + return -1; +} + +/* + * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index + * and domain as the inputs. + * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets + */ +static struct imc_pmu *imc_pmu_create(struct device_node *parent, int pmu_index, int domain) +{ + int ret = 0; + struct imc_pmu *pmu_ptr; + u32 offset; + + /* Return for unknown domain */ + if (domain < 0) + return NULL; + + /* memory for pmu */ + pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); + if (!pmu_ptr) + return NULL; + + /* Set the domain */ + pmu_ptr->domain = domain; + + ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size); + if (ret) + goto free_pmu; + + if (!of_property_read_u32(parent, "offset", &offset)) { + if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) + goto free_pmu; + } + + /* Function to register IMC pmu */ + ret = init_imc_pmu(parent, pmu_ptr, pmu_index); + if (ret) { + pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name); + kfree(pmu_ptr->pmu.name); + if (pmu_ptr->domain == IMC_DOMAIN_NEST) + kfree(pmu_ptr->mem_info); + kfree(pmu_ptr); + return NULL; + } + + return pmu_ptr; + +free_pmu: + kfree(pmu_ptr); + return NULL; +} + +static void disable_nest_pmu_counters(void) +{ + int nid, cpu; + const struct cpumask *l_cpumask; + + cpus_read_lock(); + for_each_node_with_cpus(nid) { + l_cpumask = cpumask_of_node(nid); + cpu = cpumask_first_and(l_cpumask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + continue; + opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(cpu)); + } + cpus_read_unlock(); +} + +static void disable_core_pmu_counters(void) +{ + int cpu, rc; + + cpus_read_lock(); + /* Disable the IMC Core functions */ + for_each_online_cpu(cpu) { + if (cpu_first_thread_sibling(cpu) != cpu) + continue; + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(cpu)); + if (rc) + pr_err("%s: Failed to stop Core (cpu = %d)\n", + __func__, cpu); + } + cpus_read_unlock(); +} + +int get_max_nest_dev(void) +{ + struct device_node *node; + u32 pmu_units = 0, type; + + for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) { + if (of_property_read_u32(node, "type", &type)) + continue; + + if (type == IMC_TYPE_CHIP) + pmu_units++; + } + + return pmu_units; +} + +static int opal_imc_counters_probe(struct platform_device *pdev) +{ + struct device_node *imc_dev = pdev->dev.of_node; + struct imc_pmu *pmu; + int pmu_count = 0, domain; + bool core_imc_reg = false, thread_imc_reg = false; + u32 type; + + /* + * Check whether this is kdump kernel. If yes, force the engines to + * stop and return. + */ + if (is_kdump_kernel()) { + disable_nest_pmu_counters(); + disable_core_pmu_counters(); + return -ENODEV; + } + + for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) { + pmu = NULL; + if (of_property_read_u32(imc_dev, "type", &type)) { + pr_warn("IMC Device without type property\n"); + continue; + } + + switch (type) { + case IMC_TYPE_CHIP: + domain = IMC_DOMAIN_NEST; + break; + case IMC_TYPE_CORE: + domain =IMC_DOMAIN_CORE; + break; + case IMC_TYPE_THREAD: + domain = IMC_DOMAIN_THREAD; + break; + case IMC_TYPE_TRACE: + domain = IMC_DOMAIN_TRACE; + break; + default: + pr_warn("IMC Unknown Device type \n"); + domain = -1; + break; + } + + pmu = imc_pmu_create(imc_dev, pmu_count, domain); + if (pmu != NULL) { + if (domain == IMC_DOMAIN_NEST) { + if (!imc_debugfs_parent) + export_imc_mode_and_cmd(imc_dev, pmu); + pmu_count++; + } + if (domain == IMC_DOMAIN_CORE) + core_imc_reg = true; + if (domain == IMC_DOMAIN_THREAD) + thread_imc_reg = true; + } + } + + /* If core imc is not registered, unregister thread-imc */ + if (!core_imc_reg && thread_imc_reg) + unregister_thread_imc(); + + return 0; +} + +static void opal_imc_counters_shutdown(struct platform_device *pdev) +{ + /* + * Function only stops the engines which is bare minimum. + * TODO: Need to handle proper memory cleanup and pmu + * unregister. + */ + disable_nest_pmu_counters(); + disable_core_pmu_counters(); +} + +static const struct of_device_id opal_imc_match[] = { + { .compatible = IMC_DTB_COMPAT }, + {}, +}; + +static struct platform_driver opal_imc_driver = { + .driver = { + .name = "opal-imc-counters", + .of_match_table = opal_imc_match, + }, + .probe = opal_imc_counters_probe, + .shutdown = opal_imc_counters_shutdown, +}; + +builtin_platform_driver(opal_imc_driver); diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c new file mode 100644 index 0000000000..56a1f7ce78 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file implements an irqchip for OPAL events. Whenever there is + * an interrupt that is handled by OPAL we get passed a list of events + * that Linux needs to do something about. These basically look like + * interrupts to Linux so we implement an irqchip to handle them. + * + * Copyright Alistair Popple, IBM Corporation 2014. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "powernv.h" + +/* Maximum number of events supported by OPAL firmware */ +#define MAX_NUM_EVENTS 64 + +struct opal_event_irqchip { + struct irq_chip irqchip; + struct irq_domain *domain; + unsigned long mask; +}; +static struct opal_event_irqchip opal_event_irqchip; +static u64 last_outstanding_events; +static int opal_irq_count; +static struct resource *opal_irqs; + +void opal_handle_events(void) +{ + __be64 events = 0; + u64 e; + + e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask; +again: + while (e) { + int hwirq; + + hwirq = fls64(e) - 1; + e &= ~BIT_ULL(hwirq); + + local_irq_disable(); + irq_enter(); + generic_handle_domain_irq(opal_event_irqchip.domain, hwirq); + irq_exit(); + local_irq_enable(); + + cond_resched(); + } + WRITE_ONCE(last_outstanding_events, 0); + if (opal_poll_events(&events) != OPAL_SUCCESS) + return; + e = be64_to_cpu(events) & opal_event_irqchip.mask; + if (e) + goto again; +} + +bool opal_have_pending_events(void) +{ + if (READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask) + return true; + return false; +} + +static void opal_event_mask(struct irq_data *d) +{ + clear_bit(d->hwirq, &opal_event_irqchip.mask); +} + +static void opal_event_unmask(struct irq_data *d) +{ + set_bit(d->hwirq, &opal_event_irqchip.mask); + if (opal_have_pending_events()) + opal_wake_poller(); +} + +static int opal_event_set_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * For now we only support level triggered events. The irq + * handler will be called continuously until the event has + * been cleared in OPAL. + */ + if (flow_type != IRQ_TYPE_LEVEL_HIGH) + return -EINVAL; + + return 0; +} + +static struct opal_event_irqchip opal_event_irqchip = { + .irqchip = { + .name = "OPAL EVT", + .irq_mask = opal_event_mask, + .irq_unmask = opal_event_unmask, + .irq_set_type = opal_event_set_type, + }, + .mask = 0, +}; + +static int opal_event_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_chip_data(irq, &opal_event_irqchip); + irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip, + handle_level_irq); + + return 0; +} + +static irqreturn_t opal_interrupt(int irq, void *data) +{ + __be64 events; + + opal_handle_interrupt(virq_to_hw(irq), &events); + WRITE_ONCE(last_outstanding_events, be64_to_cpu(events)); + if (opal_have_pending_events()) + opal_wake_poller(); + + return IRQ_HANDLED; +} + +static int opal_event_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) +{ + return irq_domain_get_of_node(h) == node; +} + +static int opal_event_xlate(struct irq_domain *h, struct device_node *np, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_flags) +{ + *out_hwirq = intspec[0]; + *out_flags = IRQ_TYPE_LEVEL_HIGH; + + return 0; +} + +static const struct irq_domain_ops opal_event_domain_ops = { + .match = opal_event_match, + .map = opal_event_map, + .xlate = opal_event_xlate, +}; + +void opal_event_shutdown(void) +{ + unsigned int i; + + /* First free interrupts, which will also mask them */ + for (i = 0; i < opal_irq_count; i++) { + if (!opal_irqs || !opal_irqs[i].start) + continue; + + if (in_interrupt() || irqs_disabled()) + disable_irq_nosync(opal_irqs[i].start); + else + free_irq(opal_irqs[i].start, NULL); + + opal_irqs[i].start = 0; + } +} + +int __init opal_event_init(void) +{ + struct device_node *dn, *opal_node; + bool old_style = false; + int i, rc = 0; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_warn("opal: Node not found\n"); + return -ENODEV; + } + + /* If dn is NULL it means the domain won't be linked to a DT + * node so therefore irq_of_parse_and_map(...) wont work. But + * that shouldn't be problem because if we're running a + * version of skiboot that doesn't have the dn then the + * devices won't have the correct properties and will have to + * fall back to the legacy method (opal_event_request(...)) + * anyway. */ + dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event"); + opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS, + &opal_event_domain_ops, &opal_event_irqchip); + of_node_put(dn); + if (!opal_event_irqchip.domain) { + pr_warn("opal: Unable to create irq domain\n"); + rc = -ENOMEM; + goto out; + } + + /* Look for new-style (standard) "interrupts" property */ + opal_irq_count = of_irq_count(opal_node); + + /* Absent ? Look for the old one */ + if (opal_irq_count < 1) { + /* Get opal-interrupts property and names if present */ + rc = of_property_count_u32_elems(opal_node, "opal-interrupts"); + if (rc > 0) + opal_irq_count = rc; + old_style = true; + } + + /* No interrupts ? Bail out */ + if (!opal_irq_count) + goto out; + + pr_debug("OPAL: Found %d interrupts reserved for OPAL using %s scheme\n", + opal_irq_count, old_style ? "old" : "new"); + + /* Allocate an IRQ resources array */ + opal_irqs = kcalloc(opal_irq_count, sizeof(struct resource), GFP_KERNEL); + if (WARN_ON(!opal_irqs)) { + rc = -ENOMEM; + goto out; + } + + /* Build the resources array */ + if (old_style) { + /* Old style "opal-interrupts" property */ + for (i = 0; i < opal_irq_count; i++) { + struct resource *r = &opal_irqs[i]; + const char *name = NULL; + u32 hw_irq; + int virq; + + rc = of_property_read_u32_index(opal_node, "opal-interrupts", + i, &hw_irq); + if (WARN_ON(rc < 0)) { + opal_irq_count = i; + break; + } + of_property_read_string_index(opal_node, "opal-interrupts-names", + i, &name); + virq = irq_create_mapping(NULL, hw_irq); + if (!virq) { + pr_warn("Failed to map OPAL irq 0x%x\n", hw_irq); + continue; + } + r->start = r->end = virq; + r->flags = IORESOURCE_IRQ | IRQ_TYPE_LEVEL_LOW; + r->name = name; + } + } else { + /* new style standard "interrupts" property */ + rc = of_irq_to_resource_table(opal_node, opal_irqs, opal_irq_count); + if (WARN_ON(rc < 0)) { + opal_irq_count = 0; + kfree(opal_irqs); + goto out; + } + if (WARN_ON(rc < opal_irq_count)) + opal_irq_count = rc; + } + + /* Install interrupt handlers */ + for (i = 0; i < opal_irq_count; i++) { + struct resource *r = &opal_irqs[i]; + const char *name; + + /* Prefix name */ + if (r->name && strlen(r->name)) + name = kasprintf(GFP_KERNEL, "opal-%s", r->name); + else + name = kasprintf(GFP_KERNEL, "opal"); + + if (!name) + continue; + /* Install interrupt handler */ + rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK, + name, NULL); + if (rc) { + pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start); + continue; + } + } + rc = 0; + out: + of_node_put(opal_node); + return rc; +} +machine_arch_initcall(powernv, opal_event_init); + +/** + * opal_event_request(unsigned int opal_event_nr) - Request an event + * @opal_event_nr: the opal event number to request + * + * This routine can be used to find the linux virq number which can + * then be passed to request_irq to assign a handler for a particular + * opal event. This should only be used by legacy devices which don't + * have proper device tree bindings. Most devices should use + * irq_of_parse_and_map() instead. + */ +int opal_event_request(unsigned int opal_event_nr) +{ + if (WARN_ON_ONCE(!opal_event_irqchip.domain)) + return 0; + + return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); +} +EXPORT_SYMBOL(opal_event_request); diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c new file mode 100644 index 0000000000..6c3bc4b4da --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * kmsg dumper that ensures the OPAL console fully flushes panic messages + * + * Author: Russell Currey + * + * Copyright 2015 IBM Corporation. + */ + +#include + +#include +#include + +/* + * Console output is controlled by OPAL firmware. The kernel regularly calls + * OPAL_POLL_EVENTS, which flushes some console output. In a panic state, + * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message + * may not be completely printed. This function does not actually dump the + * message, it just ensures that OPAL completely flushes the console buffer. + */ +static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + /* + * Outside of a panic context the pollers will continue to run, + * so we don't need to do any special flushing. + */ + if (reason != KMSG_DUMP_PANIC) + return; + + opal_flush_console(0); +} + +static struct kmsg_dumper opal_kmsg_dumper = { + .dump = kmsg_dump_opal_console_flush +}; + +void __init opal_kmsg_init(void) +{ + int rc; + + /* Add our dumper to the list */ + rc = kmsg_dump_register(&opal_kmsg_dumper); + if (rc != 0) + pr_err("opal: kmsg_dump_register failed; returned %d\n", rc); +} diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c new file mode 100644 index 0000000000..a16f07cdab --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV LPC bus handling. + * + * Copyright 2013 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static int opal_lpc_chip_id = -1; + +static u8 opal_lpc_inb(unsigned long port) +{ + int64_t rc; + __be32 data; + + if (opal_lpc_chip_id < 0 || port > 0xffff) + return 0xff; + rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 1); + return rc ? 0xff : be32_to_cpu(data); +} + +static __le16 __opal_lpc_inw(unsigned long port) +{ + int64_t rc; + __be32 data; + + if (opal_lpc_chip_id < 0 || port > 0xfffe) + return 0xffff; + if (port & 1) + return (__le16)opal_lpc_inb(port) << 8 | opal_lpc_inb(port + 1); + rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 2); + return rc ? 0xffff : be32_to_cpu(data); +} +static u16 opal_lpc_inw(unsigned long port) +{ + return le16_to_cpu(__opal_lpc_inw(port)); +} + +static __le32 __opal_lpc_inl(unsigned long port) +{ + int64_t rc; + __be32 data; + + if (opal_lpc_chip_id < 0 || port > 0xfffc) + return 0xffffffff; + if (port & 3) + return (__le32)opal_lpc_inb(port ) << 24 | + (__le32)opal_lpc_inb(port + 1) << 16 | + (__le32)opal_lpc_inb(port + 2) << 8 | + opal_lpc_inb(port + 3); + rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 4); + return rc ? 0xffffffff : be32_to_cpu(data); +} + +static u32 opal_lpc_inl(unsigned long port) +{ + return le32_to_cpu(__opal_lpc_inl(port)); +} + +static void opal_lpc_outb(u8 val, unsigned long port) +{ + if (opal_lpc_chip_id < 0 || port > 0xffff) + return; + opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 1); +} + +static void __opal_lpc_outw(__le16 val, unsigned long port) +{ + if (opal_lpc_chip_id < 0 || port > 0xfffe) + return; + if (port & 1) { + opal_lpc_outb(val >> 8, port); + opal_lpc_outb(val , port + 1); + return; + } + opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 2); +} + +static void opal_lpc_outw(u16 val, unsigned long port) +{ + __opal_lpc_outw(cpu_to_le16(val), port); +} + +static void __opal_lpc_outl(__le32 val, unsigned long port) +{ + if (opal_lpc_chip_id < 0 || port > 0xfffc) + return; + if (port & 3) { + opal_lpc_outb(val >> 24, port); + opal_lpc_outb(val >> 16, port + 1); + opal_lpc_outb(val >> 8, port + 2); + opal_lpc_outb(val , port + 3); + return; + } + opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 4); +} + +static void opal_lpc_outl(u32 val, unsigned long port) +{ + __opal_lpc_outl(cpu_to_le32(val), port); +} + +static void opal_lpc_insb(unsigned long p, void *b, unsigned long c) +{ + u8 *ptr = b; + + while(c--) + *(ptr++) = opal_lpc_inb(p); +} + +static void opal_lpc_insw(unsigned long p, void *b, unsigned long c) +{ + __le16 *ptr = b; + + while(c--) + *(ptr++) = __opal_lpc_inw(p); +} + +static void opal_lpc_insl(unsigned long p, void *b, unsigned long c) +{ + __le32 *ptr = b; + + while(c--) + *(ptr++) = __opal_lpc_inl(p); +} + +static void opal_lpc_outsb(unsigned long p, const void *b, unsigned long c) +{ + const u8 *ptr = b; + + while(c--) + opal_lpc_outb(*(ptr++), p); +} + +static void opal_lpc_outsw(unsigned long p, const void *b, unsigned long c) +{ + const __le16 *ptr = b; + + while(c--) + __opal_lpc_outw(*(ptr++), p); +} + +static void opal_lpc_outsl(unsigned long p, const void *b, unsigned long c) +{ + const __le32 *ptr = b; + + while(c--) + __opal_lpc_outl(*(ptr++), p); +} + +static const struct ppc_pci_io opal_lpc_io = { + .inb = opal_lpc_inb, + .inw = opal_lpc_inw, + .inl = opal_lpc_inl, + .outb = opal_lpc_outb, + .outw = opal_lpc_outw, + .outl = opal_lpc_outl, + .insb = opal_lpc_insb, + .insw = opal_lpc_insw, + .insl = opal_lpc_insl, + .outsb = opal_lpc_outsb, + .outsw = opal_lpc_outsw, + .outsl = opal_lpc_outsl, +}; + +#ifdef CONFIG_DEBUG_FS +struct lpc_debugfs_entry { + enum OpalLPCAddressType lpc_type; +}; + +static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct lpc_debugfs_entry *lpc = filp->private_data; + u32 data, pos, len, todo; + int rc; + + if (!access_ok(ubuf, count)) + return -EFAULT; + + todo = count; + while (todo) { + pos = *ppos; + + /* + * Select access size based on count and alignment and + * access type. IO and MEM only support byte accesses, + * FW supports all 3. + */ + len = 1; + if (lpc->lpc_type == OPAL_LPC_FW) { + if (todo > 3 && (pos & 3) == 0) + len = 4; + else if (todo > 1 && (pos & 1) == 0) + len = 2; + } + rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos, + &data, len); + if (rc) + return -ENXIO; + + /* + * Now there is some trickery with the data returned by OPAL + * as it's the desired data right justified in a 32-bit BE + * word. + * + * This is a very bad interface and I'm to blame for it :-( + * + * So we can't just apply a 32-bit swap to what comes from OPAL, + * because user space expects the *bytes* to be in their proper + * respective positions (ie, LPC position). + * + * So what we really want to do here is to shift data right + * appropriately on a LE kernel. + * + * IE. If the LPC transaction has bytes B0, B1, B2 and B3 in that + * order, we have in memory written to by OPAL at the "data" + * pointer: + * + * Bytes: OPAL "data" LE "data" + * 32-bit: B0 B1 B2 B3 B0B1B2B3 B3B2B1B0 + * 16-bit: B0 B1 0000B0B1 B1B00000 + * 8-bit: B0 000000B0 B0000000 + * + * So a BE kernel will have the leftmost of the above in the MSB + * and rightmost in the LSB and can just then "cast" the u32 "data" + * down to the appropriate quantity and write it. + * + * However, an LE kernel can't. It doesn't need to swap because a + * load from data followed by a store to user are going to preserve + * the byte ordering which is the wire byte order which is what the + * user wants, but in order to "crop" to the right size, we need to + * shift right first. + */ + switch(len) { + case 4: + rc = __put_user((u32)data, (u32 __user *)ubuf); + break; + case 2: +#ifdef __LITTLE_ENDIAN__ + data >>= 16; +#endif + rc = __put_user((u16)data, (u16 __user *)ubuf); + break; + default: +#ifdef __LITTLE_ENDIAN__ + data >>= 24; +#endif + rc = __put_user((u8)data, (u8 __user *)ubuf); + break; + } + if (rc) + return -EFAULT; + *ppos += len; + ubuf += len; + todo -= len; + } + + return count; +} + +static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct lpc_debugfs_entry *lpc = filp->private_data; + u32 data, pos, len, todo; + int rc; + + if (!access_ok(ubuf, count)) + return -EFAULT; + + todo = count; + while (todo) { + pos = *ppos; + + /* + * Select access size based on count and alignment and + * access type. IO and MEM only support byte acceses, + * FW supports all 3. + */ + len = 1; + if (lpc->lpc_type == OPAL_LPC_FW) { + if (todo > 3 && (pos & 3) == 0) + len = 4; + else if (todo > 1 && (pos & 1) == 0) + len = 2; + } + + /* + * Similarly to the read case, we have some trickery here but + * it's different to handle. We need to pass the value to OPAL in + * a register whose layout depends on the access size. We want + * to reproduce the memory layout of the user, however we aren't + * doing a load from user and a store to another memory location + * which would achieve that. Here we pass the value to OPAL via + * a register which is expected to contain the "BE" interpretation + * of the byte sequence. IE: for a 32-bit access, byte 0 should be + * in the MSB. So here we *do* need to byteswap on LE. + * + * User bytes: LE "data" OPAL "data" + * 32-bit: B0 B1 B2 B3 B3B2B1B0 B0B1B2B3 + * 16-bit: B0 B1 0000B1B0 0000B0B1 + * 8-bit: B0 000000B0 000000B0 + */ + switch(len) { + case 4: + rc = __get_user(data, (u32 __user *)ubuf); + data = cpu_to_be32(data); + break; + case 2: + rc = __get_user(data, (u16 __user *)ubuf); + data = cpu_to_be16(data); + break; + default: + rc = __get_user(data, (u8 __user *)ubuf); + break; + } + if (rc) + return -EFAULT; + + rc = opal_lpc_write(opal_lpc_chip_id, lpc->lpc_type, pos, + data, len); + if (rc) + return -ENXIO; + *ppos += len; + ubuf += len; + todo -= len; + } + + return count; +} + +static const struct file_operations lpc_fops = { + .read = lpc_debug_read, + .write = lpc_debug_write, + .open = simple_open, + .llseek = default_llseek, +}; + +static int opal_lpc_debugfs_create_type(struct dentry *folder, + const char *fname, + enum OpalLPCAddressType type) +{ + struct lpc_debugfs_entry *entry; + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + entry->lpc_type = type; + debugfs_create_file(fname, 0600, folder, entry, &lpc_fops); + return 0; +} + +static int opal_lpc_init_debugfs(void) +{ + struct dentry *root; + int rc = 0; + + if (opal_lpc_chip_id < 0) + return -ENODEV; + + root = debugfs_create_dir("lpc", arch_debugfs_dir); + + rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO); + rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM); + rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW); + return rc; +} +machine_device_initcall(powernv, opal_lpc_init_debugfs); +#endif /* CONFIG_DEBUG_FS */ + +void __init opal_lpc_init(void) +{ + struct device_node *np; + + /* + * Look for a Power8 LPC bus tagged as "primary", + * we currently support only one though the OPAL APIs + * support any number. + */ + for_each_compatible_node(np, NULL, "ibm,power8-lpc") { + if (!of_device_is_available(np)) + continue; + if (!of_get_property(np, "primary", NULL)) + continue; + opal_lpc_chip_id = of_get_ibm_chip_id(np); + of_node_put(np); + break; + } + if (opal_lpc_chip_id < 0) + return; + + /* Does it support direct mapping ? */ + if (of_property_present(np, "ranges")) { + pr_info("OPAL: Found memory mapped LPC bus on chip %d\n", + opal_lpc_chip_id); + isa_bridge_init_non_pci(np); + } else { + pr_info("OPAL: Found non-mapped LPC bus on chip %d\n", + opal_lpc_chip_id); + + /* Setup special IO ops */ + ppc_pci_io = opal_lpc_io; + isa_io_special = true; + } +} diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c new file mode 100644 index 0000000000..a1754a2826 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * OPAL asynchronus Memory error handling support in PowerNV. + * + * Copyright 2013 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG + +#include +#include +#include +#include +#include + +#include +#include +#include + +static int opal_mem_err_nb_init; +static LIST_HEAD(opal_memory_err_list); +static DEFINE_SPINLOCK(opal_mem_err_lock); + +struct OpalMsgNode { + struct list_head list; + struct opal_msg msg; +}; + +static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) +{ + uint64_t paddr_start, paddr_end; + + pr_debug("%s: Retrieved memory error event, type: 0x%x\n", + __func__, merr_evt->type); + switch (merr_evt->type) { + case OPAL_MEM_ERR_TYPE_RESILIENCE: + paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start); + paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end); + break; + case OPAL_MEM_ERR_TYPE_DYN_DALLOC: + paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start); + paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end); + break; + default: + return; + } + + for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) { + memory_failure(paddr_start >> PAGE_SHIFT, 0); + } +} + +static void handle_memory_error(void) +{ + unsigned long flags; + struct OpalMemoryErrorData *merr_evt; + struct OpalMsgNode *msg_node; + + spin_lock_irqsave(&opal_mem_err_lock, flags); + while (!list_empty(&opal_memory_err_list)) { + msg_node = list_entry(opal_memory_err_list.next, + struct OpalMsgNode, list); + list_del(&msg_node->list); + spin_unlock_irqrestore(&opal_mem_err_lock, flags); + + merr_evt = (struct OpalMemoryErrorData *) + &msg_node->msg.params[0]; + handle_memory_error_event(merr_evt); + kfree(msg_node); + spin_lock_irqsave(&opal_mem_err_lock, flags); + } + spin_unlock_irqrestore(&opal_mem_err_lock, flags); +} + +static void mem_error_handler(struct work_struct *work) +{ + handle_memory_error(); +} + +static DECLARE_WORK(mem_error_work, mem_error_handler); + +/* + * opal_memory_err_event - notifier handler that queues up the opal message + * to be processed later. + */ +static int opal_memory_err_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + unsigned long flags; + struct OpalMsgNode *msg_node; + + if (msg_type != OPAL_MSG_MEM_ERR) + return 0; + + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (!msg_node) { + pr_err("MEMORY_ERROR: out of memory, Opal message event not" + "handled\n"); + return -ENOMEM; + } + memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); + + spin_lock_irqsave(&opal_mem_err_lock, flags); + list_add(&msg_node->list, &opal_memory_err_list); + spin_unlock_irqrestore(&opal_mem_err_lock, flags); + + schedule_work(&mem_error_work); + return 0; +} + +static struct notifier_block opal_mem_err_nb = { + .notifier_call = opal_memory_err_event, + .next = NULL, + .priority = 0, +}; + +static int __init opal_mem_err_init(void) +{ + int ret; + + if (!opal_mem_err_nb_init) { + ret = opal_message_notifier_register( + OPAL_MSG_MEM_ERR, &opal_mem_err_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + opal_mem_err_nb_init = 1; + } + return 0; +} +machine_device_initcall(powernv, opal_mem_err_init); diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c new file mode 100644 index 0000000000..22d6efe17b --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL in-memory console interface + * + * Copyright 2014 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include + +#include "powernv.h" + +/* OPAL in-memory console. Defined in OPAL source at core/console.c */ +struct memcons { + __be64 magic; +#define MEMCONS_MAGIC 0x6630696567726173L + __be64 obuf_phys; + __be64 ibuf_phys; + __be32 obuf_size; + __be32 ibuf_size; + __be32 out_pos; +#define MEMCONS_OUT_POS_WRAP 0x80000000u +#define MEMCONS_OUT_POS_MASK 0x00ffffffu + __be32 in_prod; + __be32 in_cons; +}; + +static struct memcons *opal_memcons = NULL; + +ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count) +{ + const char *conbuf; + ssize_t ret; + size_t first_read = 0; + uint32_t out_pos, avail; + + if (!mc) + return -ENODEV; + + out_pos = be32_to_cpu(READ_ONCE(mc->out_pos)); + + /* Now we've read out_pos, put a barrier in before reading the new + * data it points to in conbuf. */ + smp_rmb(); + + conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys)); + + /* When the buffer has wrapped, read from the out_pos marker to the end + * of the buffer, and then read the remaining data as in the un-wrapped + * case. */ + if (out_pos & MEMCONS_OUT_POS_WRAP) { + + out_pos &= MEMCONS_OUT_POS_MASK; + avail = be32_to_cpu(mc->obuf_size) - out_pos; + + ret = memory_read_from_buffer(to, count, &pos, + conbuf + out_pos, avail); + + if (ret < 0) + goto out; + + first_read = ret; + to += first_read; + count -= first_read; + pos -= avail; + + if (count <= 0) + goto out; + } + + /* Sanity check. The firmware should not do this to us. */ + if (out_pos > be32_to_cpu(mc->obuf_size)) { + pr_err("OPAL: memory console corruption. Aborting read.\n"); + return -EINVAL; + } + + ret = memory_read_from_buffer(to, count, &pos, conbuf, out_pos); + + if (ret < 0) + goto out; + + ret += first_read; +out: + return ret; +} + +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) +{ + return memcons_copy(opal_memcons, to, pos, count); +} + +static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + return opal_msglog_copy(to, pos, count); +} + +static struct bin_attribute opal_msglog_attr = { + .attr = {.name = "msglog", .mode = 0400}, + .read = opal_msglog_read +}; + +struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name) +{ + u64 mcaddr; + struct memcons *mc; + + if (of_property_read_u64(node, mc_prop_name, &mcaddr)) { + pr_warn("%s property not found, no message log\n", + mc_prop_name); + goto out_err; + } + + mc = phys_to_virt(mcaddr); + if (!mc) { + pr_warn("memory console address is invalid\n"); + goto out_err; + } + + if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) { + pr_warn("memory console version is invalid\n"); + goto out_err; + } + + return mc; + +out_err: + return NULL; +} + +u32 __init memcons_get_size(struct memcons *mc) +{ + return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size); +} + +void __init opal_msglog_init(void) +{ + opal_memcons = memcons_init(opal_node, "ibm,opal-memcons"); + if (!opal_memcons) { + pr_warn("OPAL: memcons failed to load from ibm,opal-memcons\n"); + return; + } + + opal_msglog_attr.size = memcons_get_size(opal_memcons); +} + +void __init opal_msglog_sysfs_init(void) +{ + if (!opal_memcons) { + pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n"); + return; + } + + if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0) + pr_warn("OPAL: sysfs file creation failed\n"); +} diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c new file mode 100644 index 0000000000..380bc2d7eb --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV nvram code. + * + * Copyright 2011 IBM Corp. + */ + +#define DEBUG + +#include +#include +#include +#include + +#include +#include +#include + +static unsigned int nvram_size; + +static ssize_t opal_nvram_size(void) +{ + return nvram_size; +} + +static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index) +{ + s64 rc; + int off; + + if (*index >= nvram_size) + return 0; + off = *index; + if ((off + count) > nvram_size) + count = nvram_size - off; + rc = opal_read_nvram(__pa(buf), count, off); + if (rc != OPAL_SUCCESS) + return -EIO; + *index += count; + return count; +} + +/* + * This can be called in the panic path with interrupts off, so use + * mdelay in that case. + */ +static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) +{ + s64 rc = OPAL_BUSY; + int off; + + if (*index >= nvram_size) + return 0; + off = *index; + if ((off + count) > nvram_size) + count = nvram_size - off; + + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_write_nvram(__pa(buf), count, off); + if (rc == OPAL_BUSY_EVENT) { + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); + } + } + + if (rc) + return -EIO; + + *index += count; + return count; +} + +static int __init opal_nvram_init_log_partitions(void) +{ + /* Scan nvram for partitions */ + nvram_scan_partitions(); + nvram_init_oops_partition(0); + return 0; +} +machine_arch_initcall(powernv, opal_nvram_init_log_partitions); + +void __init opal_nvram_init(void) +{ + struct device_node *np; + const __be32 *nbytes_p; + + np = of_find_compatible_node(NULL, NULL, "ibm,opal-nvram"); + if (np == NULL) + return; + + nbytes_p = of_get_property(np, "#bytes", NULL); + if (!nbytes_p) { + of_node_put(np); + return; + } + nvram_size = be32_to_cpup(nbytes_p); + + pr_info("OPAL nvram setup, %u bytes\n", nvram_size); + of_node_put(np); + + ppc_md.nvram_read = opal_nvram_read; + ppc_md.nvram_write = opal_nvram_write; + ppc_md.nvram_size = opal_nvram_size; +} + diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c new file mode 100644 index 0000000000..db99ffcb7b --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-power.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL power control for graceful shutdown handling + * + * Copyright 2015 IBM Corp. + */ + +#define pr_fmt(fmt) "opal-power: " fmt + +#include +#include +#include +#include + +#include +#include + +#define SOFT_OFF 0x00 +#define SOFT_REBOOT 0x01 + +/* Detect EPOW event */ +static bool detect_epow(void) +{ + u16 epow; + int i, rc; + __be16 epow_classes; + __be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0}; + + /* + * Check for EPOW event. Kernel sends supported EPOW classes info + * to OPAL. OPAL returns EPOW info along with classes present. + */ + epow_classes = cpu_to_be16(OPAL_SYSEPOW_MAX); + rc = opal_get_epow_status(opal_epow_status, &epow_classes); + if (rc != OPAL_SUCCESS) { + pr_err("Failed to get EPOW event information\n"); + return false; + } + + /* Look for EPOW events present */ + for (i = 0; i < be16_to_cpu(epow_classes); i++) { + epow = be16_to_cpu(opal_epow_status[i]); + + /* Filter events which do not need shutdown. */ + if (i == OPAL_SYSEPOW_POWER) + epow &= ~(OPAL_SYSPOWER_CHNG | OPAL_SYSPOWER_FAIL | + OPAL_SYSPOWER_INCL); + if (epow) + return true; + } + + return false; +} + +/* Check for existing EPOW, DPO events */ +static bool __init poweroff_pending(void) +{ + int rc; + __be64 opal_dpo_timeout; + + /* Check for DPO event */ + rc = opal_get_dpo_status(&opal_dpo_timeout); + if (rc == OPAL_SUCCESS) { + pr_info("Existing DPO event detected.\n"); + return true; + } + + /* Check for EPOW event */ + if (detect_epow()) { + pr_info("Existing EPOW event detected.\n"); + return true; + } + + return false; +} + +/* OPAL power-control events notifier */ +static int opal_power_control_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + uint64_t type; + + switch (msg_type) { + case OPAL_MSG_EPOW: + if (detect_epow()) { + pr_info("EPOW msg received. Powering off system\n"); + orderly_poweroff(true); + } + break; + case OPAL_MSG_DPO: + pr_info("DPO msg received. Powering off system\n"); + orderly_poweroff(true); + break; + case OPAL_MSG_SHUTDOWN: + type = be64_to_cpu(((struct opal_msg *)msg)->params[0]); + switch (type) { + case SOFT_REBOOT: + pr_info("Reboot requested\n"); + orderly_reboot(); + break; + case SOFT_OFF: + pr_info("Poweroff requested\n"); + orderly_poweroff(true); + break; + default: + pr_err("Unknown power-control type %llu\n", type); + } + break; + default: + pr_err("Unknown OPAL message type %lu\n", msg_type); + } + + return 0; +} + +/* OPAL EPOW event notifier block */ +static struct notifier_block opal_epow_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +/* OPAL DPO event notifier block */ +static struct notifier_block opal_dpo_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +/* OPAL power-control event notifier block */ +static struct notifier_block opal_power_control_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +int __init opal_power_control_init(void) +{ + int ret, supported = 0; + struct device_node *np; + + /* Register OPAL power-control events notifier */ + ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN, + &opal_power_control_nb); + if (ret) + pr_err("Failed to register SHUTDOWN notifier, ret = %d\n", ret); + + /* Determine OPAL EPOW, DPO support */ + np = of_find_node_by_path("/ibm,opal/epow"); + if (np) { + supported = of_device_is_compatible(np, "ibm,opal-v3-epow"); + of_node_put(np); + } + + if (!supported) + return 0; + pr_info("OPAL EPOW, DPO support detected.\n"); + + /* Register EPOW event notifier */ + ret = opal_message_notifier_register(OPAL_MSG_EPOW, &opal_epow_nb); + if (ret) + pr_err("Failed to register EPOW notifier, ret = %d\n", ret); + + /* Register DPO event notifier */ + ret = opal_message_notifier_register(OPAL_MSG_DPO, &opal_dpo_nb); + if (ret) + pr_err("Failed to register DPO notifier, ret = %d\n", ret); + + /* Check for any pending EPOW or DPO events. */ + if (poweroff_pending()) + orderly_poweroff(true); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c new file mode 100644 index 0000000000..ea917266aa --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL Powercap interface + * + * Copyright 2017 IBM Corp. + */ + +#define pr_fmt(fmt) "opal-powercap: " fmt + +#include +#include +#include + +#include + +static DEFINE_MUTEX(powercap_mutex); + +static struct kobject *powercap_kobj; + +struct powercap_attr { + u32 handle; + struct kobj_attribute attr; +}; + +static struct pcap { + struct attribute_group pg; + struct powercap_attr *pattrs; +} *pcaps; + +static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct powercap_attr *pcap_attr = container_of(attr, + struct powercap_attr, attr); + struct opal_msg msg; + u32 pcap; + int ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&powercap_mutex); + if (ret) + goto out_token; + + ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap)); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) { + ret = sprintf(buf, "%u\n", be32_to_cpu(pcap)); + if (ret < 0) + ret = -EIO; + } + break; + case OPAL_SUCCESS: + ret = sprintf(buf, "%u\n", be32_to_cpu(pcap)); + if (ret < 0) + ret = -EIO; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&powercap_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static ssize_t powercap_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, + size_t count) +{ + struct powercap_attr *pcap_attr = container_of(attr, + struct powercap_attr, attr); + struct opal_msg msg; + u32 pcap; + int ret, token; + + ret = kstrtoint(buf, 0, &pcap); + if (ret) + return ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&powercap_mutex); + if (ret) + goto out_token; + + ret = opal_set_powercap(pcap_attr->handle, token, pcap); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&powercap_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static void __init powercap_add_attr(int handle, const char *name, + struct powercap_attr *attr) +{ + attr->handle = handle; + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = powercap_show; +} + +void __init opal_powercap_init(void) +{ + struct device_node *powercap, *node; + int i = 0; + + powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap"); + if (!powercap) { + pr_devel("Powercap node not found\n"); + return; + } + + pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps), + GFP_KERNEL); + if (!pcaps) + goto out_put_powercap; + + powercap_kobj = kobject_create_and_add("powercap", opal_kobj); + if (!powercap_kobj) { + pr_warn("Failed to create powercap kobject\n"); + goto out_pcaps; + } + + i = 0; + for_each_child_of_node(powercap, node) { + u32 cur, min, max; + int j = 0; + bool has_cur = false, has_min = false, has_max = false; + + if (!of_property_read_u32(node, "powercap-min", &min)) { + j++; + has_min = true; + } + + if (!of_property_read_u32(node, "powercap-max", &max)) { + j++; + has_max = true; + } + + if (!of_property_read_u32(node, "powercap-current", &cur)) { + j++; + has_cur = true; + } + + pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr), + GFP_KERNEL); + if (!pcaps[i].pattrs) + goto out_pcaps_pattrs; + + pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *), + GFP_KERNEL); + if (!pcaps[i].pg.attrs) { + kfree(pcaps[i].pattrs); + goto out_pcaps_pattrs; + } + + j = 0; + pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node); + if (!pcaps[i].pg.name) { + kfree(pcaps[i].pattrs); + kfree(pcaps[i].pg.attrs); + goto out_pcaps_pattrs; + } + + if (has_min) { + powercap_add_attr(min, "powercap-min", + &pcaps[i].pattrs[j]); + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (has_max) { + powercap_add_attr(max, "powercap-max", + &pcaps[i].pattrs[j]); + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (has_cur) { + powercap_add_attr(cur, "powercap-current", + &pcaps[i].pattrs[j]); + pcaps[i].pattrs[j].attr.attr.mode |= 0220; + pcaps[i].pattrs[j].attr.store = powercap_store; + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) { + pr_warn("Failed to create powercap attribute group %s\n", + pcaps[i].pg.name); + goto out_pcaps_pattrs; + } + i++; + } + of_node_put(powercap); + + return; + +out_pcaps_pattrs: + while (--i >= 0) { + kfree(pcaps[i].pattrs); + kfree(pcaps[i].pg.attrs); + kfree(pcaps[i].pg.name); + } + kobject_put(powercap_kobj); + of_node_put(node); +out_pcaps: + kfree(pcaps); +out_put_powercap: + of_node_put(powercap); +} diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c new file mode 100644 index 0000000000..327e2f7690 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * OPAL Runtime Diagnostics interface driver + * Supported on POWERNV platform + * + * Copyright IBM Corporation 2015 + */ + +#define pr_fmt(fmt) "opal-prd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +struct opal_prd_msg { + union { + struct opal_prd_msg_header header; + DECLARE_FLEX_ARRAY(u8, data); + }; +}; + +/* + * The msg member must be at the end of the struct, as it's followed by the + * message data. + */ +struct opal_prd_msg_queue_item { + struct list_head list; + struct opal_prd_msg msg; +}; + +static struct device_node *prd_node; +static LIST_HEAD(opal_prd_msg_queue); +static DEFINE_SPINLOCK(opal_prd_msg_queue_lock); +static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait); +static atomic_t prd_usage; + +static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size) +{ + struct device_node *parent, *node; + bool found; + + if (addr + size < addr) + return false; + + parent = of_find_node_by_path("/reserved-memory"); + if (!parent) + return false; + + found = false; + + for_each_child_of_node(parent, node) { + uint64_t range_addr, range_size, range_end; + const __be32 *addrp; + const char *label; + + addrp = of_get_address(node, 0, &range_size, NULL); + + range_addr = of_read_number(addrp, 2); + range_end = range_addr + range_size; + + label = of_get_property(node, "ibm,prd-label", NULL); + + /* PRD ranges need a label */ + if (!label) + continue; + + if (range_end <= range_addr) + continue; + + if (addr >= range_addr && addr + size <= range_end) { + found = true; + of_node_put(node); + break; + } + } + + of_node_put(parent); + return found; +} + +static int opal_prd_open(struct inode *inode, struct file *file) +{ + /* + * Prevent multiple (separate) processes from concurrent interactions + * with the FW PRD channel + */ + if (atomic_xchg(&prd_usage, 1) == 1) + return -EBUSY; + + return 0; +} + +/* + * opal_prd_mmap - maps firmware-provided ranges into userspace + * @file: file structure for the device + * @vma: VMA to map the registers into + */ + +static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t addr, size; + pgprot_t page_prot; + + pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_flags); + + addr = vma->vm_pgoff << PAGE_SHIFT; + size = vma->vm_end - vma->vm_start; + + /* ensure we're mapping within one of the allowable ranges */ + if (!opal_prd_range_is_valid(addr, size)) + return -EINVAL; + + page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, vma->vm_page_prot); + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, + page_prot); +} + +static bool opal_msg_queue_empty(void) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + ret = list_empty(&opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + return ret; +} + +static __poll_t opal_prd_poll(struct file *file, + struct poll_table_struct *wait) +{ + poll_wait(file, &opal_prd_msg_wait, wait); + + if (!opal_msg_queue_empty()) + return EPOLLIN | EPOLLRDNORM; + + return 0; +} + +static ssize_t opal_prd_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_queue_item *item; + unsigned long flags; + ssize_t size, err; + int rc; + + /* we need at least a header's worth of data */ + if (count < sizeof(item->msg.header)) + return -EINVAL; + + if (*ppos) + return -ESPIPE; + + item = NULL; + + for (;;) { + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + if (!list_empty(&opal_prd_msg_queue)) { + item = list_first_entry(&opal_prd_msg_queue, + struct opal_prd_msg_queue_item, list); + list_del(&item->list); + } + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + if (item) + break; + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible(opal_prd_msg_wait, + !opal_msg_queue_empty()); + if (rc) + return -EINTR; + } + + size = be16_to_cpu(item->msg.header.size); + if (size > count) { + err = -EINVAL; + goto err_requeue; + } + + rc = copy_to_user(buf, &item->msg, size); + if (rc) { + err = -EFAULT; + goto err_requeue; + } + + kfree(item); + + return size; + +err_requeue: + /* eep! re-queue at the head of the list */ + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + return err; +} + +static ssize_t opal_prd_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct opal_prd_msg_header hdr; + struct opal_prd_msg *msg; + ssize_t size; + int rc; + + size = sizeof(hdr); + + if (count < size) + return -EINVAL; + + /* grab the header */ + rc = copy_from_user(&hdr, buf, sizeof(hdr)); + if (rc) + return -EFAULT; + + size = be16_to_cpu(hdr.size); + + msg = memdup_user(buf, size); + if (IS_ERR(msg)) + return PTR_ERR(msg); + + rc = opal_prd_msg(msg); + if (rc) { + pr_warn("write: opal_prd_msg returned %d\n", rc); + size = -EIO; + } + + kfree(msg); + + return size; +} + +static int opal_prd_release(struct inode *inode, struct file *file) +{ + struct opal_prd_msg msg; + + msg.header.size = cpu_to_be16(sizeof(msg)); + msg.header.type = OPAL_PRD_MSG_TYPE_FINI; + + opal_prd_msg(&msg); + + atomic_xchg(&prd_usage, 0); + + return 0; +} + +static long opal_prd_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct opal_prd_info info; + struct opal_prd_scom scom; + int rc = 0; + + switch (cmd) { + case OPAL_PRD_GET_INFO: + memset(&info, 0, sizeof(info)); + info.version = OPAL_PRD_KERNEL_VERSION; + rc = copy_to_user((void __user *)param, &info, sizeof(info)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_READ: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_read(scom.chip, scom.addr, + (__be64 *)&scom.data); + scom.data = be64_to_cpu(scom.data); + pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + case OPAL_PRD_SCOM_WRITE: + rc = copy_from_user(&scom, (void __user *)param, sizeof(scom)); + if (rc) + return -EFAULT; + + scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data); + pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n", + scom.chip, scom.addr, scom.data, scom.rc); + + rc = copy_to_user((void __user *)param, &scom, sizeof(scom)); + if (rc) + return -EFAULT; + break; + + default: + rc = -EINVAL; + } + + return rc; +} + +static const struct file_operations opal_prd_fops = { + .open = opal_prd_open, + .mmap = opal_prd_mmap, + .poll = opal_prd_poll, + .read = opal_prd_read, + .write = opal_prd_write, + .unlocked_ioctl = opal_prd_ioctl, + .release = opal_prd_release, + .owner = THIS_MODULE, +}; + +static struct miscdevice opal_prd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "opal-prd", + .fops = &opal_prd_fops, +}; + +/* opal interface */ +static int opal_prd_msg_notifier(struct notifier_block *nb, + unsigned long msg_type, void *_msg) +{ + struct opal_prd_msg_queue_item *item; + struct opal_prd_msg_header *hdr; + struct opal_msg *msg = _msg; + int msg_size, item_size; + unsigned long flags; + + if (msg_type != OPAL_MSG_PRD && msg_type != OPAL_MSG_PRD2) + return 0; + + /* Calculate total size of the message and item we need to store. The + * 'size' field in the header includes the header itself. */ + hdr = (void *)msg->params; + msg_size = be16_to_cpu(hdr->size); + item_size = msg_size + sizeof(*item) - sizeof(item->msg); + + item = kzalloc(item_size, GFP_ATOMIC); + if (!item) + return -ENOMEM; + + memcpy(&item->msg.data, msg->params, msg_size); + + spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); + list_add_tail(&item->list, &opal_prd_msg_queue); + spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags); + + wake_up_interruptible(&opal_prd_msg_wait); + + return 0; +} + +static struct notifier_block opal_prd_event_nb = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + +static struct notifier_block opal_prd_event_nb2 = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + +static int opal_prd_probe(struct platform_device *pdev) +{ + int rc; + + if (!pdev || !pdev->dev.of_node) + return -ENODEV; + + /* We should only have one prd driver instance per machine; ensure + * that we only get a valid probe on a single OF node. + */ + if (prd_node) + return -EBUSY; + + prd_node = pdev->dev.of_node; + + rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb); + if (rc) { + pr_err("Couldn't register event notifier\n"); + return rc; + } + + rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2); + if (rc) { + pr_err("Couldn't register PRD2 event notifier\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + return rc; + } + + rc = misc_register(&opal_prd_dev); + if (rc) { + pr_err("failed to register miscdev\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, + &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, + &opal_prd_event_nb2); + return rc; + } + + return 0; +} + +static int opal_prd_remove(struct platform_device *pdev) +{ + misc_deregister(&opal_prd_dev); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2); + return 0; +} + +static const struct of_device_id opal_prd_match[] = { + { .compatible = "ibm,opal-prd" }, + { }, +}; + +static struct platform_driver opal_prd_driver = { + .driver = { + .name = "opal-prd", + .of_match_table = opal_prd_match, + }, + .probe = opal_prd_probe, + .remove = opal_prd_remove, +}; + +module_platform_driver(opal_prd_driver); + +MODULE_DEVICE_TABLE(of, opal_prd_match); +MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c new file mode 100644 index 0000000000..6441e17b69 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-psr.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL Power-Shift-Ratio interface + * + * Copyright 2017 IBM Corp. + */ + +#define pr_fmt(fmt) "opal-psr: " fmt + +#include +#include +#include + +#include + +static DEFINE_MUTEX(psr_mutex); + +static struct kobject *psr_kobj; + +static struct psr_attr { + u32 handle; + struct kobj_attribute attr; +} *psr_attrs; + +static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr); + struct opal_msg msg; + int psr, ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&psr_mutex); + if (ret) + goto out_token; + + ret = opal_get_power_shift_ratio(psr_attr->handle, token, + (u32 *)__pa(&psr)); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) { + ret = sprintf(buf, "%u\n", be32_to_cpu(psr)); + if (ret < 0) + ret = -EIO; + } + break; + case OPAL_SUCCESS: + ret = sprintf(buf, "%u\n", be32_to_cpu(psr)); + if (ret < 0) + ret = -EIO; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&psr_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr); + struct opal_msg msg; + int psr, ret, token; + + ret = kstrtoint(buf, 0, &psr); + if (ret) + return ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&psr_mutex); + if (ret) + goto out_token; + + ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&psr_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +void __init opal_psr_init(void) +{ + struct device_node *psr, *node; + int i = 0; + + psr = of_find_compatible_node(NULL, NULL, + "ibm,opal-power-shift-ratio"); + if (!psr) { + pr_devel("Power-shift-ratio node not found\n"); + return; + } + + psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs), + GFP_KERNEL); + if (!psr_attrs) + goto out_put_psr; + + psr_kobj = kobject_create_and_add("psr", opal_kobj); + if (!psr_kobj) { + pr_warn("Failed to create psr kobject\n"); + goto out; + } + + for_each_child_of_node(psr, node) { + if (of_property_read_u32(node, "handle", + &psr_attrs[i].handle)) + goto out_kobj; + + sysfs_attr_init(&psr_attrs[i].attr.attr); + if (of_property_read_string(node, "label", + &psr_attrs[i].attr.attr.name)) + goto out_kobj; + psr_attrs[i].attr.attr.mode = 0664; + psr_attrs[i].attr.show = psr_show; + psr_attrs[i].attr.store = psr_store; + if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) { + pr_devel("Failed to create psr sysfs file %s\n", + psr_attrs[i].attr.attr.name); + goto out_kobj; + } + i++; + } + of_node_put(psr); + + return; +out_kobj: + of_node_put(node); + kobject_put(psr_kobj); +out: + kfree(psr_attrs); +out_put_psr: + of_node_put(psr); +} diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c new file mode 100644 index 0000000000..79011a263a --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV Real Time Clock. + * + * Copyright 2011 IBM Corp. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static void __init opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) +{ + tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) + + bcd2bin((y_m_d >> 16) & 0xff)) - 1900; + tm->tm_mon = bcd2bin((y_m_d >> 8) & 0xff) - 1; + tm->tm_mday = bcd2bin(y_m_d & 0xff); + tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff); + tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff); + tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff); + tm->tm_wday = -1; +} + +time64_t __init opal_get_boot_time(void) +{ + struct rtc_time tm; + u32 y_m_d; + u64 h_m_s_ms; + __be32 __y_m_d; + __be64 __h_m_s_ms; + long rc = OPAL_BUSY; + + if (!opal_check_token(OPAL_RTC_READ)) + return 0; + + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); + if (rc == OPAL_BUSY_EVENT) { + mdelay(OPAL_BUSY_DELAY_MS); + opal_poll_events(NULL); + } else if (rc == OPAL_BUSY) { + mdelay(OPAL_BUSY_DELAY_MS); + } + } + if (rc != OPAL_SUCCESS) + return 0; + + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = be64_to_cpu(__h_m_s_ms); + opal_to_tm(y_m_d, h_m_s_ms, &tm); + return rtc_tm_to_time64(&tm); +} + +static __init int opal_time_init(void) +{ + struct platform_device *pdev; + struct device_node *rtc; + + rtc = of_find_node_by_path("/ibm,opal/rtc"); + if (rtc) { + pdev = of_platform_device_create(rtc, "opal-rtc", NULL); + of_node_put(rtc); + } else { + if (opal_check_token(OPAL_RTC_READ) || + opal_check_token(OPAL_READ_TPO)) + pdev = platform_device_register_simple("opal-rtc", -1, + NULL, 0); + else + return -ENODEV; + } + + return PTR_ERR_OR_ZERO(pdev); +} +machine_subsys_initcall(powernv, opal_time_init); diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c new file mode 100644 index 0000000000..6ac410f4d3 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PowerNV code for secure variables + * + * Copyright (C) 2019 IBM Corporation + * Author: Claudio Carvalho + * Nayna Jain + * + * APIs to access secure variables managed by OPAL. + */ + +#define pr_fmt(fmt) "secvar: "fmt + +#include +#include +#include +#include +#include +#include + +static int opal_status_to_err(int rc) +{ + int err; + + switch (rc) { + case OPAL_SUCCESS: + err = 0; + break; + case OPAL_UNSUPPORTED: + err = -ENXIO; + break; + case OPAL_PARAMETER: + err = -EINVAL; + break; + case OPAL_RESOURCE: + err = -ENOSPC; + break; + case OPAL_HARDWARE: + err = -EIO; + break; + case OPAL_NO_MEM: + err = -ENOMEM; + break; + case OPAL_EMPTY: + err = -ENOENT; + break; + case OPAL_PARTIAL: + err = -EFBIG; + break; + default: + err = -EINVAL; + } + + return err; +} + +static int opal_get_variable(const char *key, u64 ksize, u8 *data, u64 *dsize) +{ + int rc; + + if (!key || !dsize) + return -EINVAL; + + *dsize = cpu_to_be64(*dsize); + + rc = opal_secvar_get(key, ksize, data, dsize); + + *dsize = be64_to_cpu(*dsize); + + return opal_status_to_err(rc); +} + +static int opal_get_next_variable(const char *key, u64 *keylen, u64 keybufsize) +{ + int rc; + + if (!key || !keylen) + return -EINVAL; + + *keylen = cpu_to_be64(*keylen); + + rc = opal_secvar_get_next(key, keylen, keybufsize); + + *keylen = be64_to_cpu(*keylen); + + return opal_status_to_err(rc); +} + +static int opal_set_variable(const char *key, u64 ksize, u8 *data, u64 dsize) +{ + int rc; + + if (!key || !data) + return -EINVAL; + + rc = opal_secvar_enqueue_update(key, ksize, data, dsize); + + return opal_status_to_err(rc); +} + +static ssize_t opal_secvar_format(char *buf, size_t bufsize) +{ + ssize_t rc = 0; + struct device_node *node; + const char *format; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } + + rc = of_property_read_string(node, "format", &format); + if (rc) + goto out; + + rc = snprintf(buf, bufsize, "%s", format); + +out: + of_node_put(node); + + return rc; +} + +static int opal_secvar_max_size(u64 *max_size) +{ + int rc; + struct device_node *node; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!node) + return -ENODEV; + + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } + + rc = of_property_read_u64(node, "max-var-size", max_size); + +out: + of_node_put(node); + return rc; +} + +static const struct secvar_operations opal_secvar_ops = { + .get = opal_get_variable, + .get_next = opal_get_next_variable, + .set = opal_set_variable, + .format = opal_secvar_format, + .max_size = opal_secvar_max_size, +}; + +static int opal_secvar_probe(struct platform_device *pdev) +{ + if (!opal_check_token(OPAL_SECVAR_GET) + || !opal_check_token(OPAL_SECVAR_GET_NEXT) + || !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) { + pr_err("OPAL doesn't support secure variables\n"); + return -ENODEV; + } + + return set_secvar_ops(&opal_secvar_ops); +} + +static const struct of_device_id opal_secvar_match[] = { + { .compatible = "ibm,secvar-backend",}, + {}, +}; + +static struct platform_driver opal_secvar_driver = { + .driver = { + .name = "secvar", + .of_match_table = opal_secvar_match, + }, +}; + +static int __init opal_secvar_init(void) +{ + return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe); +} +device_initcall(opal_secvar_init); diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c new file mode 100644 index 0000000000..9944376b11 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL Sensor-groups interface + * + * Copyright 2017 IBM Corp. + */ + +#define pr_fmt(fmt) "opal-sensor-groups: " fmt + +#include +#include +#include + +#include + +static DEFINE_MUTEX(sg_mutex); + +static struct kobject *sg_kobj; + +struct sg_attr { + u32 handle; + struct kobj_attribute attr; +}; + +static struct sensor_group { + char name[20]; + struct attribute_group sg; + struct sg_attr *sgattrs; +} *sgs; + +int sensor_group_enable(u32 handle, bool enable) +{ + struct opal_msg msg; + int token, ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) + return token; + + ret = opal_sensor_group_enable(handle, token, enable); + if (ret == OPAL_ASYNC_COMPLETION) { + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + } else { + ret = opal_error_code(ret); + } + +out: + opal_async_release_token(token); + return ret; +} +EXPORT_SYMBOL_GPL(sensor_group_enable); + +static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct sg_attr *sattr = container_of(attr, struct sg_attr, attr); + struct opal_msg msg; + u32 data; + int ret, token; + + ret = kstrtoint(buf, 0, &data); + if (ret) + return ret; + + if (data != 1) + return -EINVAL; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&sg_mutex); + if (ret) + goto out_token; + + ret = opal_sensor_group_clear(sattr->handle, token); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&sg_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static struct sg_ops_info { + int opal_no; + const char *attr_name; + ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count); +} ops_info[] = { + { OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store }, +}; + +static void add_attr(int handle, struct sg_attr *attr, int index) +{ + attr->handle = handle; + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = ops_info[index].attr_name; + attr->attr.attr.mode = 0220; + attr->attr.store = ops_info[index].store; +} + +static int __init add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, + u32 handle) +{ + int i, j; + int count = 0; + + for (i = 0; i < len; i++) + for (j = 0; j < ARRAY_SIZE(ops_info); j++) + if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) { + add_attr(handle, &sg->sgattrs[count], j); + sg->sg.attrs[count] = + &sg->sgattrs[count].attr.attr; + count++; + } + + return sysfs_create_group(sg_kobj, &sg->sg); +} + +static int __init get_nr_attrs(const __be32 *ops, int len) +{ + int i, j; + int nr_attrs = 0; + + for (i = 0; i < len; i++) + for (j = 0; j < ARRAY_SIZE(ops_info); j++) + if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) + nr_attrs++; + + return nr_attrs; +} + +void __init opal_sensor_groups_init(void) +{ + struct device_node *sg, *node; + int i = 0; + + sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group"); + if (!sg) { + pr_devel("Sensor groups node not found\n"); + return; + } + + sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL); + if (!sgs) + goto out_sg_put; + + sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj); + if (!sg_kobj) { + pr_warn("Failed to create sensor group kobject\n"); + goto out_sgs; + } + + for_each_child_of_node(sg, node) { + const __be32 *ops; + u32 sgid, len, nr_attrs, chipid; + + ops = of_get_property(node, "ops", &len); + if (!ops) + continue; + + nr_attrs = get_nr_attrs(ops, len); + if (!nr_attrs) + continue; + + sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs), + GFP_KERNEL); + if (!sgs[i].sgattrs) + goto out_sgs_sgattrs; + + sgs[i].sg.attrs = kcalloc(nr_attrs + 1, + sizeof(*sgs[i].sg.attrs), + GFP_KERNEL); + + if (!sgs[i].sg.attrs) { + kfree(sgs[i].sgattrs); + goto out_sgs_sgattrs; + } + + if (of_property_read_u32(node, "sensor-group-id", &sgid)) { + pr_warn("sensor-group-id property not found\n"); + goto out_sgs_sgattrs; + } + + if (!of_property_read_u32(node, "ibm,chip-id", &chipid)) + sprintf(sgs[i].name, "%pOFn%d", node, chipid); + else + sprintf(sgs[i].name, "%pOFn", node); + + sgs[i].sg.name = sgs[i].name; + if (add_attr_group(ops, len, &sgs[i], sgid)) { + pr_warn("Failed to create sensor attribute group %s\n", + sgs[i].sg.name); + goto out_sgs_sgattrs; + } + i++; + } + of_node_put(sg); + + return; + +out_sgs_sgattrs: + while (--i >= 0) { + kfree(sgs[i].sgattrs); + kfree(sgs[i].sg.attrs); + } + kobject_put(sg_kobj); + of_node_put(node); +out_sgs: + kfree(sgs); +out_sg_put: + of_node_put(sg); +} diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c new file mode 100644 index 0000000000..8880a1c145 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV sensor code + * + * Copyright (C) 2013 IBM + */ + +#include +#include +#include +#include +#include +#include + +/* + * This will return sensor information to driver based on the requested sensor + * handle. A handle is an opaque id for the powernv, read by the driver from the + * device tree.. + */ +int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) +{ + int ret, token; + struct opal_msg msg; + __be32 data; + + token = opal_async_get_token_interruptible(); + if (token < 0) + return token; + + ret = opal_sensor_read(sensor_hndl, token, &data); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_err("%s: Failed to wait for the async response, %d\n", + __func__, ret); + goto out; + } + + ret = opal_error_code(opal_get_async_rc(msg)); + *sensor_data = be32_to_cpu(data); + break; + + case OPAL_SUCCESS: + ret = 0; + *sensor_data = be32_to_cpu(data); + break; + + case OPAL_WRONG_STATE: + ret = -EIO; + break; + + default: + ret = opal_error_code(ret); + break; + } + +out: + opal_async_release_token(token); + return ret; +} +EXPORT_SYMBOL_GPL(opal_get_sensor_data); + +int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data) +{ + int ret, token; + struct opal_msg msg; + __be64 data; + + if (!opal_check_token(OPAL_SENSOR_READ_U64)) { + u32 sdata; + + ret = opal_get_sensor_data(sensor_hndl, &sdata); + if (!ret) + *sensor_data = sdata; + return ret; + } + + token = opal_async_get_token_interruptible(); + if (token < 0) + return token; + + ret = opal_sensor_read_u64(sensor_hndl, token, &data); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_err("%s: Failed to wait for the async response, %d\n", + __func__, ret); + goto out_token; + } + + ret = opal_error_code(opal_get_async_rc(msg)); + *sensor_data = be64_to_cpu(data); + break; + + case OPAL_SUCCESS: + ret = 0; + *sensor_data = be64_to_cpu(data); + break; + + case OPAL_WRONG_STATE: + ret = -EIO; + break; + + default: + ret = opal_error_code(ret); + break; + } + +out_token: + opal_async_release_token(token); + return ret; +} +EXPORT_SYMBOL_GPL(opal_get_sensor_data_u64); + +int __init opal_sensor_init(void) +{ + struct platform_device *pdev; + struct device_node *sensor; + + sensor = of_find_node_by_path("/ibm,opal/sensors"); + if (!sensor) { + pr_err("Opal node 'sensors' not found\n"); + return -ENODEV; + } + + pdev = of_platform_device_create(sensor, "opal-sensor", NULL); + of_node_put(sensor); + + return PTR_ERR_OR_ZERO(pdev); +} diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c new file mode 100644 index 0000000000..a12312afe4 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV system parameter code + * + * Copyright (C) 2013 IBM + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_PARAM_DATA_LEN 64 + +static DEFINE_MUTEX(opal_sysparam_mutex); +static struct kobject *sysparam_kobj; +static void *param_data_buf; + +struct param_attr { + struct list_head list; + u32 param_id; + u32 param_size; + struct kobj_attribute kobj_attr; +}; + +static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) +{ + struct opal_msg msg; + ssize_t ret; + int token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("%s: Couldn't get the token, returning\n", + __func__); + ret = token; + goto out; + } + + ret = opal_get_param(token, param_id, (u64)buffer, length); + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); + goto out_token; + } + + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_err("%s: Failed to wait for the async response, %zd\n", + __func__, ret); + goto out_token; + } + + ret = opal_error_code(opal_get_async_rc(msg)); + +out_token: + opal_async_release_token(token); +out: + return ret; +} + +static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) +{ + struct opal_msg msg; + int ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("%s: Couldn't get the token, returning\n", + __func__); + ret = token; + goto out; + } + + ret = opal_set_param(token, param_id, (u64)buffer, length); + + if (ret != OPAL_ASYNC_COMPLETION) { + ret = opal_error_code(ret); + goto out_token; + } + + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_err("%s: Failed to wait for the async response, %d\n", + __func__, ret); + goto out_token; + } + + ret = opal_error_code(opal_get_async_rc(msg)); + +out_token: + opal_async_release_token(token); +out: + return ret; +} + +static ssize_t sys_param_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, char *buf) +{ + struct param_attr *attr = container_of(kobj_attr, struct param_attr, + kobj_attr); + ssize_t ret; + + mutex_lock(&opal_sysparam_mutex); + ret = opal_get_sys_param(attr->param_id, attr->param_size, + param_data_buf); + if (ret) + goto out; + + memcpy(buf, param_data_buf, attr->param_size); + + ret = attr->param_size; +out: + mutex_unlock(&opal_sysparam_mutex); + return ret; +} + +static ssize_t sys_param_store(struct kobject *kobj, + struct kobj_attribute *kobj_attr, const char *buf, size_t count) +{ + struct param_attr *attr = container_of(kobj_attr, struct param_attr, + kobj_attr); + ssize_t ret; + + /* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */ + if (count > MAX_PARAM_DATA_LEN) + count = MAX_PARAM_DATA_LEN; + + mutex_lock(&opal_sysparam_mutex); + memcpy(param_data_buf, buf, count); + ret = opal_set_sys_param(attr->param_id, attr->param_size, + param_data_buf); + mutex_unlock(&opal_sysparam_mutex); + if (!ret) + ret = count; + return ret; +} + +void __init opal_sys_param_init(void) +{ + struct device_node *sysparam; + struct param_attr *attr; + u32 *id, *size; + int count, i; + u8 *perm; + + if (!opal_kobj) { + pr_warn("SYSPARAM: opal kobject is not available\n"); + goto out; + } + + /* Some systems do not use sysparams; this is not an error */ + sysparam = of_find_node_by_path("/ibm,opal/sysparams"); + if (!sysparam) + goto out; + + if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { + pr_err("SYSPARAM: Opal sysparam node not compatible\n"); + goto out_node_put; + } + + sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj); + if (!sysparam_kobj) { + pr_err("SYSPARAM: Failed to create sysparam kobject\n"); + goto out_node_put; + } + + /* Allocate big enough buffer for any get/set transactions */ + param_data_buf = kzalloc(MAX_PARAM_DATA_LEN, GFP_KERNEL); + if (!param_data_buf) { + pr_err("SYSPARAM: Failed to allocate memory for param data " + "buf\n"); + goto out_kobj_put; + } + + /* Number of parameters exposed through DT */ + count = of_property_count_strings(sysparam, "param-name"); + if (count < 0) { + pr_err("SYSPARAM: No string found of property param-name in " + "the node %pOFn\n", sysparam); + goto out_param_buf; + } + + id = kcalloc(count, sizeof(*id), GFP_KERNEL); + if (!id) { + pr_err("SYSPARAM: Failed to allocate memory to read parameter " + "id\n"); + goto out_param_buf; + } + + size = kcalloc(count, sizeof(*size), GFP_KERNEL); + if (!size) { + pr_err("SYSPARAM: Failed to allocate memory to read parameter " + "size\n"); + goto out_free_id; + } + + perm = kcalloc(count, sizeof(*perm), GFP_KERNEL); + if (!perm) { + pr_err("SYSPARAM: Failed to allocate memory to read supported " + "action on the parameter"); + goto out_free_size; + } + + if (of_property_read_u32_array(sysparam, "param-id", id, count)) { + pr_err("SYSPARAM: Missing property param-id in the DT\n"); + goto out_free_perm; + } + + if (of_property_read_u32_array(sysparam, "param-len", size, count)) { + pr_err("SYSPARAM: Missing property param-len in the DT\n"); + goto out_free_perm; + } + + + if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) { + pr_err("SYSPARAM: Missing property param-perm in the DT\n"); + goto out_free_perm; + } + + attr = kcalloc(count, sizeof(*attr), GFP_KERNEL); + if (!attr) { + pr_err("SYSPARAM: Failed to allocate memory for parameter " + "attributes\n"); + goto out_free_perm; + } + + /* For each of the parameters, populate the parameter attributes */ + for (i = 0; i < count; i++) { + if (size[i] > MAX_PARAM_DATA_LEN) { + pr_warn("SYSPARAM: Not creating parameter %d as size " + "exceeds buffer length\n", i); + continue; + } + + sysfs_attr_init(&attr[i].kobj_attr.attr); + attr[i].param_id = id[i]; + attr[i].param_size = size[i]; + if (of_property_read_string_index(sysparam, "param-name", i, + &attr[i].kobj_attr.attr.name)) + continue; + + /* If the parameter is read-only or read-write */ + switch (perm[i] & 3) { + case OPAL_SYSPARAM_READ: + attr[i].kobj_attr.attr.mode = 0444; + break; + case OPAL_SYSPARAM_WRITE: + attr[i].kobj_attr.attr.mode = 0200; + break; + case OPAL_SYSPARAM_RW: + attr[i].kobj_attr.attr.mode = 0644; + break; + default: + break; + } + + attr[i].kobj_attr.show = sys_param_show; + attr[i].kobj_attr.store = sys_param_store; + + if (sysfs_create_file(sysparam_kobj, &attr[i].kobj_attr.attr)) { + pr_err("SYSPARAM: Failed to create sysfs file %s\n", + attr[i].kobj_attr.attr.name); + goto out_free_attr; + } + } + + kfree(perm); + kfree(size); + kfree(id); + of_node_put(sysparam); + return; + +out_free_attr: + kfree(attr); +out_free_perm: + kfree(perm); +out_free_size: + kfree(size); +out_free_id: + kfree(id); +out_param_buf: + kfree(param_data_buf); +out_kobj_put: + kobject_put(sysparam_kobj); +out_node_put: + of_node_put(sysparam); +out: + return; +} diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c new file mode 100644 index 0000000000..91b36541b9 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#ifdef CONFIG_JUMP_LABEL +struct static_key opal_tracepoint_key = STATIC_KEY_INIT; + +int opal_tracepoint_regfunc(void) +{ + static_key_slow_inc(&opal_tracepoint_key); + return 0; +} + +void opal_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&opal_tracepoint_key); +} +#else +/* + * We optimise OPAL calls by placing opal_tracepoint_refcount + * directly in the TOC so we can check if the opal tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long opal_tracepoint_refcount; + +int opal_tracepoint_regfunc(void) +{ + opal_tracepoint_refcount++; + return 0; +} + +void opal_tracepoint_unregfunc(void) +{ + opal_tracepoint_refcount--; +} +#endif + +/* + * Since the tracing code might execute OPAL calls we need to guard against + * recursion. + */ +static DEFINE_PER_CPU(unsigned int, opal_trace_depth); + +void __trace_opal_entry(unsigned long opcode, unsigned long *args) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = this_cpu_ptr(&opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + preempt_disable(); + trace_opal_entry(opcode, args); + (*depth)--; + +out: + local_irq_restore(flags); +} + +void __trace_opal_exit(long opcode, unsigned long retval) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = this_cpu_ptr(&opal_trace_depth); + + if (*depth) + goto out; + + (*depth)++; + trace_opal_exit(opcode, retval); + preempt_enable(); + (*depth)--; + +out: + local_irq_restore(flags); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S new file mode 100644 index 0000000000..0ed95f7534 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PowerNV OPAL API wrappers + * + * Copyright 2011 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include + + .section ".text" + +/* + * r3-r10 - OPAL call arguments + * STK_PARAM(R11) - OPAL opcode + * STK_PARAM(R12) - MSR to restore + */ +_GLOBAL_TOC(__opal_call) + mflr r0 + std r0,PPC_LR_STKOFF(r1) + ld r12,STK_PARAM(R12)(r1) + li r0,MSR_IR|MSR_DR|MSR_LE + andc r12,r12,r0 + LOAD_REG_ADDR(r11, opal_return) + mtlr r11 + LOAD_REG_ADDR(r11, opal) + ld r2,0(r11) + ld r11,8(r11) + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 + /* set token to r0 */ + ld r0,STK_PARAM(R11)(r1) + hrfid +opal_return: + /* + * Restore MSR on OPAL return. The MSR is set to big-endian. + */ +#ifdef __BIG_ENDIAN__ + ld r11,STK_PARAM(R12)(r1) + mtmsrd r11 +#else + /* Endian can only be switched with rfi, must byte reverse MSR load */ + .short 0x4039 /* li r10,STK_PARAM(R12) */ + .byte (STK_PARAM(R12) >> 8) & 0xff + .byte STK_PARAM(R12) & 0xff + + .long 0x280c6a7d /* ldbrx r11,r10,r1 */ + .long 0x05009f42 /* bcl 20,31,$+4 */ + .long 0xa602487d /* mflr r10 */ + .long 0x14004a39 /* addi r10,r10,20 */ + .long 0xa64b5a7d /* mthsrr0 r10 */ + .long 0xa64b7b7d /* mthsrr1 r11 */ + .long 0x2402004c /* hrfid */ +#endif + LOAD_PACA_TOC() + ld r0,PPC_LR_STKOFF(r1) + mtlr r0 + blr diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c new file mode 100644 index 0000000000..748c2b97fa --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV SCOM bus debugfs interface + * + * Copyright 2010 Benjamin Herrenschmidt, IBM Corp + * + * and David Gibson, IBM Corporation. + * Copyright 2013 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static u64 opal_scom_unmangle(u64 addr) +{ + u64 tmp; + + /* + * XSCOM addresses use the top nibble to set indirect mode and + * its form. Bits 4-11 are always 0. + * + * Because the debugfs interface uses signed offsets and shifts + * the address left by 3, we basically cannot use the top 4 bits + * of the 64-bit address, and thus cannot use the indirect bit. + * + * To deal with that, we support the indirect bits being in + * bits 4-7 (IBM notation) instead of bit 0-3 in this API, we + * do the conversion here. + * + * For in-kernel use, we don't need to do this mangling. In + * kernel won't have bits 4-7 set. + * + * So: + * debugfs will always set 0-3 = 0 and clear 4-7 + * kernel will always clear 0-3 = 0 and set 4-7 + */ + tmp = addr; + tmp &= 0x0f00000000000000; + addr &= 0xf0ffffffffffffff; + addr |= tmp << 4; + + return addr; +} + +static int opal_scom_read(uint32_t chip, uint64_t addr, u64 reg, u64 *value) +{ + int64_t rc; + __be64 v; + + reg = opal_scom_unmangle(addr + reg); + rc = opal_xscom_read(chip, reg, (__be64 *)__pa(&v)); + if (rc) { + *value = 0xfffffffffffffffful; + return -EIO; + } + *value = be64_to_cpu(v); + return 0; +} + +static int opal_scom_write(uint32_t chip, uint64_t addr, u64 reg, u64 value) +{ + int64_t rc; + + reg = opal_scom_unmangle(addr + reg); + rc = opal_xscom_write(chip, reg, value); + if (rc) + return -EIO; + return 0; +} + +struct scom_debug_entry { + u32 chip; + struct debugfs_blob_wrapper path; + char name[16]; +}; + +static ssize_t scom_debug_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct scom_debug_entry *ent = filp->private_data; + u64 __user *ubuf64 = (u64 __user *)ubuf; + loff_t off = *ppos; + ssize_t done = 0; + u64 reg, reg_base, reg_cnt, val; + int rc; + + if (off < 0 || (off & 7) || (count & 7)) + return -EINVAL; + reg_base = off >> 3; + reg_cnt = count >> 3; + + for (reg = 0; reg < reg_cnt; reg++) { + rc = opal_scom_read(ent->chip, reg_base, reg, &val); + if (!rc) + rc = put_user(val, ubuf64); + if (rc) { + if (!done) + done = rc; + break; + } + ubuf64++; + *ppos += 8; + done += 8; + } + return done; +} + +static ssize_t scom_debug_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct scom_debug_entry *ent = filp->private_data; + u64 __user *ubuf64 = (u64 __user *)ubuf; + loff_t off = *ppos; + ssize_t done = 0; + u64 reg, reg_base, reg_cnt, val; + int rc; + + if (off < 0 || (off & 7) || (count & 7)) + return -EINVAL; + reg_base = off >> 3; + reg_cnt = count >> 3; + + for (reg = 0; reg < reg_cnt; reg++) { + rc = get_user(val, ubuf64); + if (!rc) + rc = opal_scom_write(ent->chip, reg_base, reg, val); + if (rc) { + if (!done) + done = rc; + break; + } + ubuf64++; + done += 8; + } + return done; +} + +static const struct file_operations scom_debug_fops = { + .read = scom_debug_read, + .write = scom_debug_write, + .open = simple_open, + .llseek = default_llseek, +}; + +static int scom_debug_init_one(struct dentry *root, struct device_node *dn, + int chip) +{ + struct scom_debug_entry *ent; + struct dentry *dir; + + ent = kzalloc(sizeof(*ent), GFP_KERNEL); + if (!ent) + return -ENOMEM; + + ent->chip = chip; + snprintf(ent->name, 16, "%08x", chip); + ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn); + if (!ent->path.data) { + kfree(ent); + return -ENOMEM; + } + + ent->path.size = strlen((char *)ent->path.data); + + dir = debugfs_create_dir(ent->name, root); + if (IS_ERR(dir)) { + kfree(ent->path.data); + kfree(ent); + return -1; + } + + debugfs_create_blob("devspec", 0400, dir, &ent->path); + debugfs_create_file("access", 0600, dir, ent, &scom_debug_fops); + + return 0; +} + +static int scom_debug_init(void) +{ + struct device_node *dn; + struct dentry *root; + int chip, rc; + + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return 0; + + root = debugfs_create_dir("scom", arch_debugfs_dir); + if (IS_ERR(root)) + return -1; + + rc = 0; + for_each_node_with_property(dn, "scom-controller") { + chip = of_get_ibm_chip_id(dn); + WARN_ON(chip == -1); + rc |= scom_debug_init_one(root, dn, chip); + } + + return rc; +} +device_initcall(scom_debug_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c new file mode 100644 index 0000000000..cdf3838f08 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal.c @@ -0,0 +1,1251 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV OPAL high level interfaces + * + * Copyright 2011 IBM Corp. + */ + +#define pr_fmt(fmt) "opal: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "powernv.h" + +#define OPAL_MSG_QUEUE_MAX 16 + +struct opal_msg_node { + struct list_head list; + struct opal_msg msg; +}; + +static DEFINE_SPINLOCK(msg_list_lock); +static LIST_HEAD(msg_list); + +/* /sys/firmware/opal */ +struct kobject *opal_kobj; + +struct opal { + u64 base; + u64 entry; + u64 size; +} opal; + +struct mcheck_recoverable_range { + u64 start_addr; + u64 end_addr; + u64 recover_addr; +}; + +static int msg_list_size; + +static struct mcheck_recoverable_range *mc_recoverable_range; +static int mc_recoverable_range_len; + +struct device_node *opal_node; +static DEFINE_SPINLOCK(opal_write_lock); +static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; +static uint32_t opal_heartbeat; +static struct task_struct *kopald_tsk; +static struct opal_msg *opal_msg; +static u32 opal_msg_size __ro_after_init; + +void __init opal_configure_cores(void) +{ + u64 reinit_flags = 0; + + /* Do the actual re-init, This will clobber all FPRs, VRs, etc... + * + * It will preserve non volatile GPRs and HSPRG0/1. It will + * also restore HIDs and other SPRs to their original value + * but it might clobber a bunch. + */ +#ifdef __BIG_ENDIAN__ + reinit_flags |= OPAL_REINIT_CPUS_HILE_BE; +#else + reinit_flags |= OPAL_REINIT_CPUS_HILE_LE; +#endif + + /* + * POWER9 always support running hash: + * ie. Host hash supports hash guests + * Host radix supports hash/radix guests + */ + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) { + reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH; + if (early_radix_enabled()) + reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX; + } + + opal_reinit_cpus(reinit_flags); + + /* Restore some bits */ + if (cur_cpu_spec->cpu_restore) + cur_cpu_spec->cpu_restore(); +} + +int __init early_init_dt_scan_opal(unsigned long node, + const char *uname, int depth, void *data) +{ + const void *basep, *entryp, *sizep; + int basesz, entrysz, runtimesz; + + if (depth != 1 || strcmp(uname, "ibm,opal") != 0) + return 0; + + basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz); + entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz); + sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz); + + if (!basep || !entryp || !sizep) + return 1; + + opal.base = of_read_number(basep, basesz/4); + opal.entry = of_read_number(entryp, entrysz/4); + opal.size = of_read_number(sizep, runtimesz/4); + + pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n", + opal.base, basep, basesz); + pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n", + opal.entry, entryp, entrysz); + pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n", + opal.size, sizep, runtimesz); + + if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) { + powerpc_firmware_features |= FW_FEATURE_OPAL; + pr_debug("OPAL detected !\n"); + } else { + panic("OPAL != V3 detected, no longer supported.\n"); + } + + return 1; +} + +int __init early_init_dt_scan_recoverable_ranges(unsigned long node, + const char *uname, int depth, void *data) +{ + int i, psize, size; + const __be32 *prop; + + if (depth != 1 || strcmp(uname, "ibm,opal") != 0) + return 0; + + prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize); + + if (!prop) + return 1; + + pr_debug("Found machine check recoverable ranges.\n"); + + /* + * Calculate number of available entries. + * + * Each recoverable address range entry is (start address, len, + * recovery address), 2 cells each for start and recovery address, + * 1 cell for len, totalling 5 cells per entry. + */ + mc_recoverable_range_len = psize / (sizeof(*prop) * 5); + + /* Sanity check */ + if (!mc_recoverable_range_len) + return 1; + + /* Size required to hold all the entries. */ + size = mc_recoverable_range_len * + sizeof(struct mcheck_recoverable_range); + + /* + * Allocate a buffer to hold the MC recoverable ranges. + */ + mc_recoverable_range = memblock_alloc(size, __alignof__(u64)); + if (!mc_recoverable_range) + panic("%s: Failed to allocate %u bytes align=0x%lx\n", + __func__, size, __alignof__(u64)); + + for (i = 0; i < mc_recoverable_range_len; i++) { + mc_recoverable_range[i].start_addr = + of_read_number(prop + (i * 5) + 0, 2); + mc_recoverable_range[i].end_addr = + mc_recoverable_range[i].start_addr + + of_read_number(prop + (i * 5) + 2, 1); + mc_recoverable_range[i].recover_addr = + of_read_number(prop + (i * 5) + 3, 2); + + pr_debug("Machine check recoverable range: %llx..%llx: %llx\n", + mc_recoverable_range[i].start_addr, + mc_recoverable_range[i].end_addr, + mc_recoverable_range[i].recover_addr); + } + return 1; +} + +static int __init opal_register_exception_handlers(void) +{ +#ifdef __BIG_ENDIAN__ + u64 glue; + + if (!(powerpc_firmware_features & FW_FEATURE_OPAL)) + return -ENODEV; + + /* Hookup some exception handlers except machine check. We use the + * fwnmi area at 0x7000 to provide the glue space to OPAL + */ + glue = 0x7000; + + /* + * Only ancient OPAL firmware requires this. + * Specifically, firmware from FW810.00 (released June 2014) + * through FW810.20 (Released October 2014). + * + * Check if we are running on newer (post Oct 2014) firmware that + * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to + * patch the HMI interrupt and we catch it directly in Linux. + * + * For older firmware (i.e < FW810.20), we fallback to old behavior and + * let OPAL patch the HMI vector and handle it inside OPAL firmware. + * + * For newer firmware we catch/handle the HMI directly in Linux. + */ + if (!opal_check_token(OPAL_HANDLE_HMI)) { + pr_info("Old firmware detected, OPAL handles HMIs.\n"); + opal_register_exception_handler( + OPAL_HYPERVISOR_MAINTENANCE_HANDLER, + 0, glue); + glue += 128; + } + + /* + * Only applicable to ancient firmware, all modern + * (post March 2015/skiboot 5.0) firmware will just return + * OPAL_UNSUPPORTED. + */ + opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); +#endif + + return 0; +} +machine_early_initcall(powernv, opal_register_exception_handlers); + +static void queue_replay_msg(void *msg) +{ + struct opal_msg_node *msg_node; + + if (msg_list_size < OPAL_MSG_QUEUE_MAX) { + msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC); + if (msg_node) { + INIT_LIST_HEAD(&msg_node->list); + memcpy(&msg_node->msg, msg, sizeof(struct opal_msg)); + list_add_tail(&msg_node->list, &msg_list); + msg_list_size++; + } else + pr_warn_once("message queue no memory\n"); + + if (msg_list_size >= OPAL_MSG_QUEUE_MAX) + pr_warn_once("message queue full\n"); + } +} + +static void dequeue_replay_msg(enum opal_msg_type msg_type) +{ + struct opal_msg_node *msg_node, *tmp; + + list_for_each_entry_safe(msg_node, tmp, &msg_list, list) { + if (be32_to_cpu(msg_node->msg.msg_type) != msg_type) + continue; + + atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], + msg_type, + &msg_node->msg); + + list_del(&msg_node->list); + kfree(msg_node); + msg_list_size--; + } +} + +/* + * Opal message notifier based on message type. Allow subscribers to get + * notified for specific messgae type. + */ +int opal_message_notifier_register(enum opal_msg_type msg_type, + struct notifier_block *nb) +{ + int ret; + unsigned long flags; + + if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) { + pr_warn("%s: Invalid arguments, msg_type:%d\n", + __func__, msg_type); + return -EINVAL; + } + + spin_lock_irqsave(&msg_list_lock, flags); + ret = atomic_notifier_chain_register( + &opal_msg_notifier_head[msg_type], nb); + + /* + * If the registration succeeded, replay any queued messages that came + * in prior to the notifier chain registration. msg_list_lock held here + * to ensure they're delivered prior to any subsequent messages. + */ + if (ret == 0) + dequeue_replay_msg(msg_type); + + spin_unlock_irqrestore(&msg_list_lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(opal_message_notifier_register); + +int opal_message_notifier_unregister(enum opal_msg_type msg_type, + struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister( + &opal_msg_notifier_head[msg_type], nb); +} +EXPORT_SYMBOL_GPL(opal_message_notifier_unregister); + +static void opal_message_do_notify(uint32_t msg_type, void *msg) +{ + unsigned long flags; + bool queued = false; + + spin_lock_irqsave(&msg_list_lock, flags); + if (opal_msg_notifier_head[msg_type].head == NULL) { + /* + * Queue up the msg since no notifiers have registered + * yet for this msg_type. + */ + queue_replay_msg(msg); + queued = true; + } + spin_unlock_irqrestore(&msg_list_lock, flags); + + if (queued) + return; + + /* notify subscribers */ + atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type], + msg_type, msg); +} + +static void opal_handle_message(void) +{ + s64 ret; + u32 type; + + ret = opal_get_msg(__pa(opal_msg), opal_msg_size); + /* No opal message pending. */ + if (ret == OPAL_RESOURCE) + return; + + /* check for errors. */ + if (ret) { + pr_warn("%s: Failed to retrieve opal message, err=%lld\n", + __func__, ret); + return; + } + + type = be32_to_cpu(opal_msg->msg_type); + + /* Sanity check */ + if (type >= OPAL_MSG_TYPE_MAX) { + pr_warn_once("%s: Unknown message type: %u\n", __func__, type); + return; + } + opal_message_do_notify(type, (void *)opal_msg); +} + +static irqreturn_t opal_message_notify(int irq, void *data) +{ + opal_handle_message(); + return IRQ_HANDLED; +} + +static int __init opal_message_init(struct device_node *opal_node) +{ + int ret, i, irq; + + ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size); + if (ret) { + pr_notice("Failed to read opal-msg-size property\n"); + opal_msg_size = sizeof(struct opal_msg); + } + + opal_msg = kmalloc(opal_msg_size, GFP_KERNEL); + if (!opal_msg) { + opal_msg_size = sizeof(struct opal_msg); + /* Try to allocate fixed message size */ + opal_msg = kmalloc(opal_msg_size, GFP_KERNEL); + BUG_ON(opal_msg == NULL); + } + + for (i = 0; i < OPAL_MSG_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]); + + irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING)); + if (!irq) { + pr_err("%s: Can't register OPAL event irq (%d)\n", + __func__, irq); + return irq; + } + + ret = request_irq(irq, opal_message_notify, + IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL); + if (ret) { + pr_err("%s: Can't request OPAL event irq (%d)\n", + __func__, ret); + return ret; + } + + return 0; +} + +int opal_get_chars(uint32_t vtermno, char *buf, int count) +{ + s64 rc; + __be64 evt, len; + + if (!opal.entry) + return -ENODEV; + opal_poll_events(&evt); + if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0) + return 0; + len = cpu_to_be64(count); + rc = opal_console_read(vtermno, &len, buf); + if (rc == OPAL_SUCCESS) + return be64_to_cpu(len); + return 0; +} + +static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic) +{ + unsigned long flags = 0 /* shut up gcc */; + int written; + __be64 olen; + s64 rc; + + if (!opal.entry) + return -ENODEV; + + if (atomic) + spin_lock_irqsave(&opal_write_lock, flags); + rc = opal_console_write_buffer_space(vtermno, &olen); + if (rc || be64_to_cpu(olen) < total_len) { + /* Closed -> drop characters */ + if (rc) + written = total_len; + else + written = -EAGAIN; + goto out; + } + + /* Should not get a partial write here because space is available. */ + olen = cpu_to_be64(total_len); + rc = opal_console_write(vtermno, &olen, data); + if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + written = -EAGAIN; + goto out; + } + + /* Closed or other error drop */ + if (rc != OPAL_SUCCESS) { + written = opal_error_code(rc); + goto out; + } + + written = be64_to_cpu(olen); + if (written < total_len) { + if (atomic) { + /* Should not happen */ + pr_warn("atomic console write returned partial " + "len=%d written=%d\n", total_len, written); + } + if (!written) + written = -EAGAIN; + } + +out: + if (atomic) + spin_unlock_irqrestore(&opal_write_lock, flags); + + return written; +} + +int opal_put_chars(uint32_t vtermno, const char *data, int total_len) +{ + return __opal_put_chars(vtermno, data, total_len, false); +} + +/* + * opal_put_chars_atomic will not perform partial-writes. Data will be + * atomically written to the terminal or not at all. This is not strictly + * true at the moment because console space can race with OPAL's console + * writes. + */ +int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len) +{ + return __opal_put_chars(vtermno, data, total_len, true); +} + +static s64 __opal_flush_console(uint32_t vtermno) +{ + s64 rc; + + if (!opal_check_token(OPAL_CONSOLE_FLUSH)) { + __be64 evt; + + /* + * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, + * the console can still be flushed by calling the polling + * function while it has OPAL_EVENT_CONSOLE_OUTPUT events. + */ + WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n"); + + opal_poll_events(&evt); + if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)) + return OPAL_SUCCESS; + return OPAL_BUSY; + + } else { + rc = opal_console_flush(vtermno); + if (rc == OPAL_BUSY_EVENT) { + opal_poll_events(NULL); + rc = OPAL_BUSY; + } + return rc; + } + +} + +/* + * opal_flush_console spins until the console is flushed + */ +int opal_flush_console(uint32_t vtermno) +{ + for (;;) { + s64 rc = __opal_flush_console(vtermno); + + if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) { + mdelay(1); + continue; + } + + return opal_error_code(rc); + } +} + +/* + * opal_flush_chars is an hvc interface that sleeps until the console is + * flushed if wait, otherwise it will return -EBUSY if the console has data, + * -EAGAIN if it has data and some of it was flushed. + */ +int opal_flush_chars(uint32_t vtermno, bool wait) +{ + for (;;) { + s64 rc = __opal_flush_console(vtermno); + + if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) { + if (wait) { + msleep(OPAL_BUSY_DELAY_MS); + continue; + } + if (rc == OPAL_PARTIAL) + return -EAGAIN; + } + + return opal_error_code(rc); + } +} + +static int opal_recover_mce(struct pt_regs *regs, + struct machine_check_event *evt) +{ + int recovered = 0; + + if (regs_is_unrecoverable(regs)) { + /* If MSR_RI isn't set, we cannot recover */ + pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); + recovered = 0; + } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { + /* Platform corrected itself */ + recovered = 1; + } else if (evt->severity == MCE_SEV_FATAL) { + /* Fatal machine check */ + pr_err("Machine check interrupt is fatal\n"); + recovered = 0; + } + + if (!recovered && evt->sync_error) { + /* + * Try to kill processes if we get a synchronous machine check + * (e.g., one caused by execution of this instruction). This + * will devolve into a panic if we try to kill init or are in + * an interrupt etc. + * + * TODO: Queue up this address for hwpoisioning later. + * TODO: This is not quite right for d-side machine + * checks ->nip is not necessarily the important + * address. + */ + if ((user_mode(regs))) { + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } else if (die_will_crash()) { + /* + * die() would kill the kernel, so better to go via + * the platform reboot code that will log the + * machine check. + */ + recovered = 0; + } else { + die_mce("Machine check", regs, SIGBUS); + recovered = 1; + } + } + + return recovered; +} + +void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) +{ + panic_flush_kmsg_start(); + + pr_emerg("Hardware platform error: %s\n", msg); + if (regs) + show_regs(regs); + smp_send_stop(); + + panic_flush_kmsg_end(); + + /* + * Don't bother to shut things down because this will + * xstop the system. + */ + if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg) + == OPAL_UNSUPPORTED) { + pr_emerg("Reboot type %d not supported for %s\n", + OPAL_REBOOT_PLATFORM_ERROR, msg); + } + + /* + * We reached here. There can be three possibilities: + * 1. We are running on a firmware level that do not support + * opal_cec_reboot2() + * 2. We are running on a firmware level that do not support + * OPAL_REBOOT_PLATFORM_ERROR reboot type. + * 3. We are running on FSP based system that does not need + * opal to trigger checkstop explicitly for error analysis. + * The FSP PRD component would have already got notified + * about this error through other channels. + * 4. We are running on a newer skiboot that by default does + * not cause a checkstop, drops us back to the kernel to + * extract context and state at the time of the error. + */ + + panic(msg); +} + +int opal_machine_check(struct pt_regs *regs) +{ + struct machine_check_event evt; + + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return 0; + + /* Print things out */ + if (evt.version != MCE_V1) { + pr_err("Machine Check Exception, Unknown event version %d !\n", + evt.version); + return 0; + } + machine_check_print_event_info(&evt, user_mode(regs), false); + + if (opal_recover_mce(regs, &evt)) + return 1; + + pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception"); +} + +/* Early hmi handler called in real mode. */ +int opal_hmi_exception_early(struct pt_regs *regs) +{ + s64 rc; + + /* + * call opal hmi handler. Pass paca address as token. + * The return value OPAL_SUCCESS is an indication that there is + * an HMI event generated waiting to pull by Linux. + */ + rc = opal_handle_hmi(); + if (rc == OPAL_SUCCESS) { + local_paca->hmi_event_available = 1; + return 1; + } + return 0; +} + +int opal_hmi_exception_early2(struct pt_regs *regs) +{ + s64 rc; + __be64 out_flags; + + /* + * call opal hmi handler. + * Check 64-bit flag mask to find out if an event was generated, + * and whether TB is still valid or not etc. + */ + rc = opal_handle_hmi2(&out_flags); + if (rc != OPAL_SUCCESS) + return 0; + + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT) + local_paca->hmi_event_available = 1; + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL) + tb_invalid = true; + return 1; +} + +/* HMI exception handler called in virtual mode when irqs are next enabled. */ +int opal_handle_hmi_exception(struct pt_regs *regs) +{ + /* + * Check if HMI event is available. + * if Yes, then wake kopald to process them. + */ + if (!local_paca->hmi_event_available) + return 0; + + local_paca->hmi_event_available = 0; + opal_wake_poller(); + + return 1; +} + +static uint64_t find_recovery_address(uint64_t nip) +{ + int i; + + for (i = 0; i < mc_recoverable_range_len; i++) + if ((nip >= mc_recoverable_range[i].start_addr) && + (nip < mc_recoverable_range[i].end_addr)) + return mc_recoverable_range[i].recover_addr; + return 0; +} + +bool opal_mce_check_early_recovery(struct pt_regs *regs) +{ + uint64_t recover_addr = 0; + + if (!opal.base || !opal.size) + goto out; + + if ((regs->nip >= opal.base) && + (regs->nip < (opal.base + opal.size))) + recover_addr = find_recovery_address(regs->nip); + + /* + * Setup regs->nip to rfi into fixup address. + */ + if (recover_addr) + regs_set_return_ip(regs, recover_addr); + +out: + return !!recover_addr; +} + +static int __init opal_sysfs_init(void) +{ + opal_kobj = kobject_create_and_add("opal", firmware_kobj); + if (!opal_kobj) { + pr_warn("kobject_create_and_add opal failed\n"); + return -ENOMEM; + } + + return 0; +} + +static ssize_t export_attr_read(struct file *fp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + return memory_read_from_buffer(buf, count, &off, bin_attr->private, + bin_attr->size); +} + +static int opal_add_one_export(struct kobject *parent, const char *export_name, + struct device_node *np, const char *prop_name) +{ + struct bin_attribute *attr = NULL; + const char *name = NULL; + u64 vals[2]; + int rc; + + rc = of_property_read_u64_array(np, prop_name, &vals[0], 2); + if (rc) + goto out; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) { + rc = -ENOMEM; + goto out; + } + name = kstrdup(export_name, GFP_KERNEL); + if (!name) { + rc = -ENOMEM; + goto out; + } + + sysfs_bin_attr_init(attr); + attr->attr.name = name; + attr->attr.mode = 0400; + attr->read = export_attr_read; + attr->private = __va(vals[0]); + attr->size = vals[1]; + + rc = sysfs_create_bin_file(parent, attr); +out: + if (rc) { + kfree(name); + kfree(attr); + } + + return rc; +} + +static void opal_add_exported_attrs(struct device_node *np, + struct kobject *kobj) +{ + struct device_node *child; + struct property *prop; + + for_each_property_of_node(np, prop) { + int rc; + + if (!strcmp(prop->name, "name") || + !strcmp(prop->name, "phandle")) + continue; + + rc = opal_add_one_export(kobj, prop->name, np, prop->name); + if (rc) { + pr_warn("Unable to add export %pOF/%s, rc = %d!\n", + np, prop->name, rc); + } + } + + for_each_child_of_node(np, child) { + struct kobject *child_kobj; + + child_kobj = kobject_create_and_add(child->name, kobj); + if (!child_kobj) { + pr_err("Unable to create export dir for %pOF\n", child); + continue; + } + + opal_add_exported_attrs(child, child_kobj); + } +} + +/* + * opal_export_attrs: creates a sysfs node for each property listed in + * the device-tree under /ibm,opal/firmware/exports/ + * All new sysfs nodes are created under /opal/exports/. + * This allows for reserved memory regions (e.g. HDAT) to be read. + * The new sysfs nodes are only readable by root. + */ +static void opal_export_attrs(void) +{ + struct device_node *np; + struct kobject *kobj; + int rc; + + np = of_find_node_by_path("/ibm,opal/firmware/exports"); + if (!np) + return; + + /* Create new 'exports' directory - /sys/firmware/opal/exports */ + kobj = kobject_create_and_add("exports", opal_kobj); + if (!kobj) { + pr_warn("kobject_create_and_add() of exports failed\n"); + of_node_put(np); + return; + } + + opal_add_exported_attrs(np, kobj); + + /* + * NB: symbol_map existed before the generic export interface so it + * lives under the top level opal_kobj. + */ + rc = opal_add_one_export(opal_kobj, "symbol_map", + np->parent, "symbol-map"); + if (rc) + pr_warn("Error %d creating OPAL symbols file\n", rc); + + of_node_put(np); +} + +static void __init opal_dump_region_init(void) +{ + void *addr; + uint64_t size; + int rc; + + if (!opal_check_token(OPAL_REGISTER_DUMP_REGION)) + return; + + /* Register kernel log buffer */ + addr = log_buf_addr_get(); + if (addr == NULL) + return; + + size = log_buf_len_get(); + if (size == 0) + return; + + rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF, + __pa(addr), size); + /* Don't warn if this is just an older OPAL that doesn't + * know about that call + */ + if (rc && rc != OPAL_UNSUPPORTED) + pr_warn("DUMP: Failed to register kernel log buffer. " + "rc = %d\n", rc); +} + +static void __init opal_pdev_init(const char *compatible) +{ + struct device_node *np; + + for_each_compatible_node(np, NULL, compatible) + of_platform_device_create(np, NULL, NULL); +} + +static void __init opal_imc_init_dev(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT); + if (np) + of_platform_device_create(np, NULL, NULL); + + of_node_put(np); +} + +static int kopald(void *unused) +{ + unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1; + + set_freezable(); + do { + try_to_freeze(); + + opal_handle_events(); + + set_current_state(TASK_INTERRUPTIBLE); + if (opal_have_pending_events()) + __set_current_state(TASK_RUNNING); + else + schedule_timeout(timeout); + + } while (!kthread_should_stop()); + + return 0; +} + +void opal_wake_poller(void) +{ + if (kopald_tsk) + wake_up_process(kopald_tsk); +} + +static void __init opal_init_heartbeat(void) +{ + /* Old firwmware, we assume the HVC heartbeat is sufficient */ + if (of_property_read_u32(opal_node, "ibm,heartbeat-ms", + &opal_heartbeat) != 0) + opal_heartbeat = 0; + + if (opal_heartbeat) + kopald_tsk = kthread_run(kopald, NULL, "kopald"); +} + +static int __init opal_init(void) +{ + struct device_node *np, *consoles, *leds; + int rc; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_warn("Device node not found\n"); + return -ENODEV; + } + + /* Register OPAL consoles if any ports */ + consoles = of_find_node_by_path("/ibm,opal/consoles"); + if (consoles) { + for_each_child_of_node(consoles, np) { + if (!of_node_name_eq(np, "serial")) + continue; + of_platform_device_create(np, NULL, NULL); + } + of_node_put(consoles); + } + + /* Initialise OPAL messaging system */ + opal_message_init(opal_node); + + /* Initialise OPAL asynchronous completion interface */ + opal_async_comp_init(); + + /* Initialise OPAL sensor interface */ + opal_sensor_init(); + + /* Initialise OPAL hypervisor maintainence interrupt handling */ + opal_hmi_handler_init(); + + /* Create i2c platform devices */ + opal_pdev_init("ibm,opal-i2c"); + + /* Handle non-volatile memory devices */ + opal_pdev_init("pmem-region"); + + /* Setup a heatbeat thread if requested by OPAL */ + opal_init_heartbeat(); + + /* Detect In-Memory Collection counters and create devices*/ + opal_imc_init_dev(); + + /* Create leds platform devices */ + leds = of_find_node_by_path("/ibm,opal/leds"); + if (leds) { + of_platform_device_create(leds, "opal_leds", NULL); + of_node_put(leds); + } + + /* Initialise OPAL message log interface */ + opal_msglog_init(); + + /* Create "opal" kobject under /sys/firmware */ + rc = opal_sysfs_init(); + if (rc == 0) { + /* Setup dump region interface */ + opal_dump_region_init(); + /* Setup error log interface */ + rc = opal_elog_init(); + /* Setup code update interface */ + opal_flash_update_init(); + /* Setup platform dump extract interface */ + opal_platform_dump_init(); + /* Setup system parameters interface */ + opal_sys_param_init(); + /* Setup message log sysfs interface. */ + opal_msglog_sysfs_init(); + /* Add all export properties*/ + opal_export_attrs(); + } + + /* Initialize platform devices: IPMI backend, PRD & flash interface */ + opal_pdev_init("ibm,opal-ipmi"); + opal_pdev_init("ibm,opal-flash"); + opal_pdev_init("ibm,opal-prd"); + + /* Initialise platform device: oppanel interface */ + opal_pdev_init("ibm,opal-oppanel"); + + /* Initialise OPAL kmsg dumper for flushing console on panic */ + opal_kmsg_init(); + + /* Initialise OPAL powercap interface */ + opal_powercap_init(); + + /* Initialise OPAL Power-Shifting-Ratio interface */ + opal_psr_init(); + + /* Initialise OPAL sensor groups */ + opal_sensor_groups_init(); + + /* Initialise OPAL Power control interface */ + opal_power_control_init(); + + /* Initialize OPAL secure variables */ + opal_pdev_init("ibm,secvar-backend"); + + return 0; +} +machine_subsys_initcall(powernv, opal_init); + +void opal_shutdown(void) +{ + long rc = OPAL_BUSY; + + opal_event_shutdown(); + + /* + * Then sync with OPAL which ensure anything that can + * potentially write to our memory has completed such + * as an ongoing dump retrieval + */ + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_sync_host_reboot(); + if (rc == OPAL_BUSY) + opal_poll_events(NULL); + else + mdelay(10); + } + + /* Unregister memory dump region */ + if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION)) + opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF); +} + +/* Export this so that test modules can use it */ +EXPORT_SYMBOL_GPL(opal_invalid_call); +EXPORT_SYMBOL_GPL(opal_xscom_read); +EXPORT_SYMBOL_GPL(opal_xscom_write); +EXPORT_SYMBOL_GPL(opal_ipmi_send); +EXPORT_SYMBOL_GPL(opal_ipmi_recv); +EXPORT_SYMBOL_GPL(opal_flash_read); +EXPORT_SYMBOL_GPL(opal_flash_write); +EXPORT_SYMBOL_GPL(opal_flash_erase); +EXPORT_SYMBOL_GPL(opal_prd_msg); +EXPORT_SYMBOL_GPL(opal_check_token); + +/* Convert a region of vmalloc memory to an opal sg list */ +struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, + unsigned long vmalloc_size) +{ + struct opal_sg_list *sg, *first = NULL; + unsigned long i = 0; + + sg = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!sg) + goto nomem; + + first = sg; + + while (vmalloc_size > 0) { + uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT; + uint64_t length = min(vmalloc_size, PAGE_SIZE); + + sg->entry[i].data = cpu_to_be64(data); + sg->entry[i].length = cpu_to_be64(length); + i++; + + if (i >= SG_ENTRIES_PER_NODE) { + struct opal_sg_list *next; + + next = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!next) + goto nomem; + + sg->length = cpu_to_be64( + i * sizeof(struct opal_sg_entry) + 16); + i = 0; + sg->next = cpu_to_be64(__pa(next)); + sg = next; + } + + vmalloc_addr += length; + vmalloc_size -= length; + } + + sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16); + + return first; + +nomem: + pr_err("%s : Failed to allocate memory\n", __func__); + opal_free_sg_list(first); + return NULL; +} + +void opal_free_sg_list(struct opal_sg_list *sg) +{ + while (sg) { + uint64_t next = be64_to_cpu(sg->next); + + kfree(sg); + + if (next) + sg = __va(next); + else + sg = NULL; + } +} + +int opal_error_code(int rc) +{ + switch (rc) { + case OPAL_SUCCESS: return 0; + + case OPAL_PARAMETER: return -EINVAL; + case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; + case OPAL_BUSY: + case OPAL_BUSY_EVENT: return -EBUSY; + case OPAL_NO_MEM: return -ENOMEM; + case OPAL_PERMISSION: return -EPERM; + + case OPAL_UNSUPPORTED: return -EIO; + case OPAL_HARDWARE: return -EIO; + case OPAL_INTERNAL_ERROR: return -EIO; + case OPAL_TIMEOUT: return -ETIMEDOUT; + default: + pr_err("%s: unexpected OPAL error %d\n", __func__, rc); + return -EIO; + } +} + +void powernv_set_nmmu_ptcr(unsigned long ptcr) +{ + int rc; + + if (firmware_has_feature(FW_FEATURE_OPAL)) { + rc = opal_nmmu_set_ptcr(-1UL, ptcr); + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) + pr_warn("%s: Unable to set nest mmu ptcr\n", __func__); + } +} + +EXPORT_SYMBOL_GPL(opal_poll_events); +EXPORT_SYMBOL_GPL(opal_rtc_read); +EXPORT_SYMBOL_GPL(opal_rtc_write); +EXPORT_SYMBOL_GPL(opal_tpo_read); +EXPORT_SYMBOL_GPL(opal_tpo_write); +EXPORT_SYMBOL_GPL(opal_i2c_request); +/* Export these symbols for PowerNV LED class driver */ +EXPORT_SYMBOL_GPL(opal_leds_get_ind); +EXPORT_SYMBOL_GPL(opal_leds_set_ind); +/* Export this symbol for PowerNV Operator Panel class driver */ +EXPORT_SYMBOL_GPL(opal_write_oppanel_async); +/* Export this for KVM */ +EXPORT_SYMBOL_GPL(opal_int_set_mfrr); +EXPORT_SYMBOL_GPL(opal_int_eoi); +EXPORT_SYMBOL_GPL(opal_error_code); +/* Export the below symbol for NX compression */ +EXPORT_SYMBOL(opal_nx_coproc_init); diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c new file mode 100644 index 0000000000..7e419de71d --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2014-2016 IBM Corp. + */ + +#include +#include +#include +#include + +#include "pci.h" + +int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + int rc; + + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + pe_info(pe, "Switching PHB to CXL\n"); + + rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); + if (rc == OPAL_UNSUPPORTED) + dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n"); + else if (rc) + dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); + + return rc; +} +EXPORT_SYMBOL(pnv_phb_to_cxl_mode); + +/* Find PHB for cxl dev and allocate MSI hwirqs? + * Returns the absolute hardware IRQ number + */ +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); + + if (hwirq < 0) { + dev_warn(&dev->dev, "Failed to find a free MSI\n"); + return -ENOSPC; + } + + return phb->msi_base + hwirq; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); + +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); +} +EXPORT_SYMBOL(pnv_cxl_release_hwirqs); + +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq; + + for (i = 1; i < CXL_IRQ_RANGES; i++) { + if (!irqs->range[i]) + continue; + pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", + i, irqs->offset[i], + irqs->range[i]); + hwirq = irqs->offset[i] - phb->msi_base; + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, + irqs->range[i]); + } +} +EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); + +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq, try; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + /* 0 is reserved for the multiplexed PSL DSI interrupt */ + for (i = 1; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); + if (hwirq >= 0) + break; + try /= 2; + } + if (!try) + goto fail; + + irqs->offset[i] = phb->msi_base + hwirq; + irqs->range[i] = try; + pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", + i, irqs->offset[i], irqs->range[i]); + num -= try; + } + if (num) + goto fail; + + return 0; +fail: + pnv_cxl_release_hwirq_ranges(irqs, dev); + return -ENOSPC; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); + +int pnv_cxl_get_irq_count(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + return phb->msi_bmp.irq_count; +} +EXPORT_SYMBOL(pnv_cxl_get_irq_count); + +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + unsigned int xive_num = hwirq - phb->msi_base; + struct pnv_ioda_pe *pe; + int rc; + + if (!(pe = pnv_ioda_get_pe(dev))) + return -ENODEV; + + /* Assign XIVE to PE */ + rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); + if (rc) { + pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " + "hwirq 0x%x XIVE 0x%x PE\n", + pci_name(dev), rc, phb->msi_base, hwirq, xive_num); + return -EIO; + } + pnv_set_msi_irq_chip(phb, virq); + + return 0; +} +EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c new file mode 100644 index 0000000000..e96324502d --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * TCE helpers for IODA PCI/PCIe on PowerNV platforms + * + * Copyright 2018 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#include +#include +#include "pci.h" + +unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + unsigned long mask = 0; + int i, rc, count; + u32 val; + + count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); + if (count <= 0) { + mask = SZ_4K | SZ_64K; + /* Add 16M for POWER8 by default */ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + !cpu_has_feature(CPU_FTR_ARCH_300)) + mask |= SZ_16M | SZ_256M; + return mask; + } + + for (i = 0; i < count; i++) { + rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", + i, &val); + if (rc == 0) + mask |= 1ULL << val; + } + + return mask; +} + +void pnv_pci_setup_iommu_table(struct iommu_table *tbl, + void *tce_mem, u64 tce_size, + u64 dma_offset, unsigned int page_shift) +{ + tbl->it_blocksize = 16; + tbl->it_base = (unsigned long)tce_mem; + tbl->it_page_shift = page_shift; + tbl->it_offset = dma_offset >> tbl->it_page_shift; + tbl->it_index = 0; + tbl->it_size = tce_size >> 3; + tbl->it_busno = 0; + tbl->it_type = TCE_PCI; +} + +static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) +{ + struct page *tce_mem = NULL; + __be64 *addr; + + tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN, + shift - PAGE_SHIFT); + if (!tce_mem) { + pr_err("Failed to allocate a TCE memory, level shift=%d\n", + shift); + return NULL; + } + addr = page_address(tce_mem); + memset(addr, 0, 1UL << shift); + + return addr; +} + +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned int levels); + +static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) +{ + __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; + int level = tbl->it_indirect_levels; + const long shift = ilog2(tbl->it_level_size); + unsigned long mask = (tbl->it_level_size - 1) << (level * shift); + + while (level) { + int n = (idx & mask) >> (level * shift); + unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n])); + + if (!tce) { + __be64 *tmp2; + + if (!alloc) + return NULL; + + tmp2 = pnv_alloc_tce_level(tbl->it_nid, + ilog2(tbl->it_level_size) + 3); + if (!tmp2) + return NULL; + + tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE; + oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0, + cpu_to_be64(tce))); + if (oldtce) { + pnv_pci_ioda2_table_do_free_pages(tmp2, + ilog2(tbl->it_level_size) + 3, 1); + tce = oldtce; + } + } + + tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); + idx &= ~mask; + mask >>= shift; + --level; + } + + return tmp + idx; +} + +int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + unsigned long attrs) +{ + u64 proto_tce = iommu_direction_to_tce_perm(direction); + u64 rpn = __pa(uaddr) >> tbl->it_page_shift; + long i; + + if (proto_tce & TCE_PCI_WRITE) + proto_tce |= TCE_PCI_READ; + + for (i = 0; i < npages; i++) { + unsigned long newtce = proto_tce | + ((rpn + i) << tbl->it_page_shift); + unsigned long idx = index - tbl->it_offset + i; + + *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce); + } + + return 0; +} + +#ifdef CONFIG_IOMMU_API +int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *hpa | proto_tce, oldtce; + unsigned long idx = index - tbl->it_offset; + __be64 *ptce = NULL; + + BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); + + if (*direction == DMA_NONE) { + ptce = pnv_tce(tbl, false, idx, false); + if (!ptce) { + *hpa = 0; + return 0; + } + } + + if (!ptce) { + ptce = pnv_tce(tbl, false, idx, true); + if (!ptce) + return -ENOMEM; + } + + if (newtce & TCE_PCI_WRITE) + newtce |= TCE_PCI_READ; + + oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce))); + *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); + *direction = iommu_tce_direction(oldtce); + + return 0; +} + +__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc) +{ + if (WARN_ON_ONCE(!tbl->it_userspace)) + return NULL; + + return pnv_tce(tbl, true, index - tbl->it_offset, alloc); +} +#endif + +void pnv_tce_free(struct iommu_table *tbl, long index, long npages) +{ + long i; + + for (i = 0; i < npages; i++) { + unsigned long idx = index - tbl->it_offset + i; + __be64 *ptce = pnv_tce(tbl, false, idx, false); + + if (ptce) + *ptce = cpu_to_be64(0); + else + /* Skip the rest of the level */ + i |= tbl->it_level_size - 1; + } +} + +unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +{ + __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false); + + if (!ptce) + return 0; + + return be64_to_cpu(*ptce); +} + +static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, + unsigned long size, unsigned int levels) +{ + const unsigned long addr_ul = (unsigned long) addr & + ~(TCE_PCI_READ | TCE_PCI_WRITE); + + if (levels) { + long i; + u64 *tmp = (u64 *) addr_ul; + + for (i = 0; i < size; ++i) { + unsigned long hpa = be64_to_cpu(tmp[i]); + + if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) + continue; + + pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, + levels - 1); + } + } + + free_pages(addr_ul, get_order(size << 3)); +} + +void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) +{ + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + + if (!tbl->it_size) + return; + + pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, + tbl->it_indirect_levels); + if (tbl->it_userspace) { + pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size, + tbl->it_indirect_levels); + } +} + +static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, + unsigned int levels, unsigned long limit, + unsigned long *current_offset, unsigned long *total_allocated) +{ + __be64 *addr, *tmp; + unsigned long allocated = 1UL << shift; + unsigned int entries = 1UL << (shift - 3); + long i; + + addr = pnv_alloc_tce_level(nid, shift); + *total_allocated += allocated; + + --levels; + if (!levels) { + *current_offset += allocated; + return addr; + } + + for (i = 0; i < entries; ++i) { + tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, + levels, limit, current_offset, total_allocated); + if (!tmp) + break; + + addr[i] = cpu_to_be64(__pa(tmp) | + TCE_PCI_READ | TCE_PCI_WRITE); + + if (*current_offset >= limit) + break; + } + + return addr; +} + +long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + bool alloc_userspace_copy, struct iommu_table *tbl) +{ + void *addr, *uas = NULL; + unsigned long offset = 0, level_shift, total_allocated = 0; + unsigned long total_allocated_uas = 0; + const unsigned int window_shift = ilog2(window_size); + unsigned int entries_shift = window_shift - page_shift; + unsigned int table_shift = max_t(unsigned int, entries_shift + 3, + PAGE_SHIFT); + const unsigned long tce_table_size = 1UL << table_shift; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) + return -EINVAL; + + if (!is_power_of_2(window_size)) + return -EINVAL; + + /* Adjust direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + level_shift = entries_shift + 3; + level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); + + if ((level_shift - 3) * levels + page_shift >= 55) + return -EINVAL; + + /* Allocate TCE table */ + addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + 1, tce_table_size, &offset, &total_allocated); + + /* addr==NULL means that the first level allocation failed */ + if (!addr) + return -ENOMEM; + + /* + * First level was allocated but some lower level failed as + * we did not allocate as much as we wanted, + * release partially allocated table. + */ + if (levels == 1 && offset < tce_table_size) + goto free_tces_exit; + + /* Allocate userspace view of the TCE table */ + if (alloc_userspace_copy) { + offset = 0; + uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, + 1, tce_table_size, &offset, + &total_allocated_uas); + if (!uas) + goto free_tces_exit; + if (levels == 1 && (offset < tce_table_size || + total_allocated_uas != total_allocated)) + goto free_uas_exit; + } + + /* Setup linux iommu table */ + pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, + page_shift); + tbl->it_level_size = 1ULL << (level_shift - 3); + tbl->it_indirect_levels = levels - 1; + tbl->it_userspace = uas; + tbl->it_nid = nid; + + pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n", + window_size, tce_table_size, bus_offset, tbl->it_base, + tbl->it_userspace, 1, levels); + + return 0; + +free_uas_exit: + pnv_pci_ioda2_table_do_free_pages(uas, + 1ULL << (level_shift - 3), levels - 1); +free_tces_exit: + pnv_pci_ioda2_table_do_free_pages(addr, + 1ULL << (level_shift - 3), levels - 1); + + return -ENOMEM; +} + +void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + long i; + bool found; + struct iommu_table_group_link *tgl; + + if (!tbl || !table_group) + return; + + /* Remove link to a group from table's list of attached groups */ + found = false; + + rcu_read_lock(); + list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { + if (tgl->table_group == table_group) { + list_del_rcu(&tgl->next); + kfree_rcu(tgl, rcu); + found = true; + break; + } + } + rcu_read_unlock(); + + if (WARN_ON(!found)) + return; + + /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ + found = false; + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] == tbl) { + iommu_tce_table_put(tbl); + table_group->tables[i] = NULL; + found = true; + break; + } + } + WARN_ON(!found); +} + +long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group) +{ + struct iommu_table_group_link *tgl = NULL; + + if (WARN_ON(!tbl || !table_group)) + return -EINVAL; + + tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, + node); + if (!tgl) + return -ENOMEM; + + tgl->table_group = table_group; + list_add_rcu(&tgl->next, &tbl->it_group_list); + + table_group->tables[num] = iommu_tce_table_get(tbl); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c new file mode 100644 index 0000000000..28fac47700 --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -0,0 +1,2827 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support PCI/PCIe on PowerNV platforms + * + * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "powernv.h" +#include "pci.h" +#include "../../../../drivers/pci/pci.h" + +/* This array is indexed with enum pnv_phb_type */ +static const char * const pnv_phb_names[] = { "IODA2", "NPU_OCAPI" }; + +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); +static void pnv_pci_configure_bus(struct pci_bus *bus); + +void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + char pfix[32]; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + if (pe->flags & PNV_IODA_PE_DEV) + strscpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); + else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) + sprintf(pfix, "%04x:%02x ", + pci_domain_nr(pe->pbus), pe->pbus->number); +#ifdef CONFIG_PCI_IOV + else if (pe->flags & PNV_IODA_PE_VF) + sprintf(pfix, "%04x:%02x:%2x.%d", + pci_domain_nr(pe->parent_dev->bus), + (pe->rid & 0xff00) >> 8, + PCI_SLOT(pe->rid), PCI_FUNC(pe->rid)); +#endif /* CONFIG_PCI_IOV*/ + + printk("%spci %s: [PE# %.2x] %pV", + level, pfix, pe->pe_number, &vaf); + + va_end(args); +} + +static bool pnv_iommu_bypass_disabled __read_mostly; +static bool pci_reset_phbs __read_mostly; + +static int __init iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + + while (*str) { + if (!strncmp(str, "nobypass", 8)) { + pnv_iommu_bypass_disabled = true; + pr_info("PowerNV: IOMMU bypass window disabled.\n"); + break; + } + str += strcspn(str, ","); + if (*str == ',') + str++; + } + + return 0; +} +early_param("iommu", iommu_setup); + +static int __init pci_reset_phbs_setup(char *str) +{ + pci_reset_phbs = true; + return 0; +} + +early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup); + +static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) +{ + s64 rc; + + phb->ioda.pe_array[pe_no].phb = phb; + phb->ioda.pe_array[pe_no].pe_number = pe_no; + phb->ioda.pe_array[pe_no].dma_setup_done = false; + + /* + * Clear the PE frozen state as it might be put into frozen state + * in the last PCI remove path. It's not harmful to do so when the + * PE is already in unfrozen state. + */ + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) + pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, pe_no); + + return &phb->ioda.pe_array[pe_no]; +} + +static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) +{ + if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) { + pr_warn("%s: Invalid PE %x on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + return; + } + + mutex_lock(&phb->ioda.pe_alloc_mutex); + if (test_and_set_bit(pe_no, phb->ioda.pe_alloc)) + pr_debug("%s: PE %x was reserved on PHB#%x\n", + __func__, pe_no, phb->hose->global_number); + mutex_unlock(&phb->ioda.pe_alloc_mutex); + + pnv_ioda_init_pe(phb, pe_no); +} + +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count) +{ + struct pnv_ioda_pe *ret = NULL; + int run = 0, pe, i; + + mutex_lock(&phb->ioda.pe_alloc_mutex); + + /* scan backwards for a run of @count cleared bits */ + for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { + if (test_bit(pe, phb->ioda.pe_alloc)) { + run = 0; + continue; + } + + run++; + if (run == count) + break; + } + if (run != count) + goto out; + + for (i = pe; i < pe + count; i++) { + set_bit(i, phb->ioda.pe_alloc); + pnv_ioda_init_pe(phb, i); + } + ret = &phb->ioda.pe_array[pe]; + +out: + mutex_unlock(&phb->ioda.pe_alloc_mutex); + return ret; +} + +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + unsigned int pe_num = pe->pe_number; + + WARN_ON(pe->pdev); + memset(pe, 0, sizeof(struct pnv_ioda_pe)); + + mutex_lock(&phb->ioda.pe_alloc_mutex); + clear_bit(pe_num, phb->ioda.pe_alloc); + mutex_unlock(&phb->ioda.pe_alloc_mutex); +} + +/* The default M64 BAR is shared by all PEs */ +static int pnv_ioda2_init_m64(struct pnv_phb *phb) +{ + const char *desc; + struct resource *r; + s64 rc; + + /* Configure the default M64 BAR */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + phb->ioda.m64_base, + 0, /* unused */ + phb->ioda.m64_size); + if (rc != OPAL_SUCCESS) { + desc = "configuring"; + goto fail; + } + + /* Enable the default M64 BAR */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_ENABLE_M64_SPLIT); + if (rc != OPAL_SUCCESS) { + desc = "enabling"; + goto fail; + } + + /* + * Exclude the segments for reserved and root bus PE, which + * are first or last two PEs. + */ + r = &phb->hose->mem_resources[1]; + if (phb->ioda.reserved_pe_idx == 0) + r->start += (2 * phb->ioda.m64_segsize); + else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) + r->end -= (2 * phb->ioda.m64_segsize); + else + pr_warn(" Cannot strip M64 segment for reserved PE#%x\n", + phb->ioda.reserved_pe_idx); + + return 0; + +fail: + pr_warn(" Failure %lld %s M64 BAR#%d\n", + rc, desc, phb->ioda.m64_bar_idx); + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_bar_idx, + OPAL_DISABLE_M64); + return -EIO; +} + +static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, + unsigned long *pe_bitmap) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct resource *r; + resource_size_t base, sgsz, start, end; + int segno, i; + + base = phb->ioda.m64_base; + sgsz = phb->ioda.m64_segsize; + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { + r = &pdev->resource[i]; + if (!r->parent || !pnv_pci_is_m64(phb, r)) + continue; + + start = ALIGN_DOWN(r->start - base, sgsz); + end = ALIGN(r->end - base, sgsz); + for (segno = start / sgsz; segno < end / sgsz; segno++) { + if (pe_bitmap) + set_bit(segno, pe_bitmap); + else + pnv_ioda_reserve_pe(phb, segno); + } + } +} + +static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus, + unsigned long *pe_bitmap, + bool all) +{ + struct pci_dev *pdev; + + list_for_each_entry(pdev, &bus->devices, bus_list) { + pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap); + + if (all && pdev->subordinate) + pnv_ioda_reserve_m64_pe(pdev->subordinate, + pe_bitmap, all); + } +} + +static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); + struct pnv_ioda_pe *master_pe, *pe; + unsigned long size, *pe_alloc; + int i; + + /* Root bus shouldn't use M64 */ + if (pci_is_root_bus(bus)) + return NULL; + + /* Allocate bitmap */ + size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); + pe_alloc = kzalloc(size, GFP_KERNEL); + if (!pe_alloc) { + pr_warn("%s: Out of memory !\n", + __func__); + return NULL; + } + + /* Figure out reserved PE numbers by the PE */ + pnv_ioda_reserve_m64_pe(bus, pe_alloc, all); + + /* + * the current bus might not own M64 window and that's all + * contributed by its child buses. For the case, we needn't + * pick M64 dependent PE#. + */ + if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) { + kfree(pe_alloc); + return NULL; + } + + /* + * Figure out the master PE and put all slave PEs to master + * PE's list to form compound PE. + */ + master_pe = NULL; + i = -1; + while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) < + phb->ioda.total_pe_num) { + pe = &phb->ioda.pe_array[i]; + + phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number; + if (!master_pe) { + pe->flags |= PNV_IODA_PE_MASTER; + INIT_LIST_HEAD(&pe->slaves); + master_pe = pe; + } else { + pe->flags |= PNV_IODA_PE_SLAVE; + pe->master = master_pe; + list_add_tail(&pe->list, &master_pe->slaves); + } + } + + kfree(pe_alloc); + return master_pe; +} + +static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) +{ + struct pci_controller *hose = phb->hose; + struct device_node *dn = hose->dn; + struct resource *res; + u32 m64_range[2], i; + const __be32 *r; + u64 pci_addr; + + if (phb->type != PNV_PHB_IODA2) { + pr_info(" Not support M64 window\n"); + return; + } + + if (!firmware_has_feature(FW_FEATURE_OPAL)) { + pr_info(" Firmware too old to support M64 window\n"); + return; + } + + r = of_get_property(dn, "ibm,opal-m64-window", NULL); + if (!r) { + pr_info(" No on %pOF\n", + dn); + return; + } + + /* + * Find the available M64 BAR range and pickup the last one for + * covering the whole 64-bits space. We support only one range. + */ + if (of_property_read_u32_array(dn, "ibm,opal-available-m64-ranges", + m64_range, 2)) { + /* In absence of the property, assume 0..15 */ + m64_range[0] = 0; + m64_range[1] = 16; + } + /* We only support 64 bits in our allocator */ + if (m64_range[1] > 63) { + pr_warn("%s: Limiting M64 range to 63 (from %d) on PHB#%x\n", + __func__, m64_range[1], phb->hose->global_number); + m64_range[1] = 63; + } + /* Empty range, no m64 */ + if (m64_range[1] <= m64_range[0]) { + pr_warn("%s: M64 empty, disabling M64 usage on PHB#%x\n", + __func__, phb->hose->global_number); + return; + } + + /* Configure M64 informations */ + res = &hose->mem_resources[1]; + res->name = dn->full_name; + res->start = of_translate_address(dn, r + 2); + res->end = res->start + of_read_number(r + 4, 2) - 1; + res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + pci_addr = of_read_number(r, 2); + hose->mem_offset[1] = res->start - pci_addr; + + phb->ioda.m64_size = resource_size(res); + phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num; + phb->ioda.m64_base = pci_addr; + + /* This lines up nicely with the display from processing OF ranges */ + pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx (M64 #%d..%d)\n", + res->start, res->end, pci_addr, m64_range[0], + m64_range[0] + m64_range[1] - 1); + + /* Mark all M64 used up by default */ + phb->ioda.m64_bar_alloc = (unsigned long)-1; + + /* Use last M64 BAR to cover M64 window */ + m64_range[1]--; + phb->ioda.m64_bar_idx = m64_range[0] + m64_range[1]; + + pr_info(" Using M64 #%d as default window\n", phb->ioda.m64_bar_idx); + + /* Mark remaining ones free */ + for (i = m64_range[0]; i < m64_range[1]; i++) + clear_bit(i, &phb->ioda.m64_bar_alloc); + + /* + * Setup init functions for M64 based on IODA version, IODA3 uses + * the IODA2 code. + */ + phb->init_m64 = pnv_ioda2_init_m64; +} + +static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no]; + struct pnv_ioda_pe *slave; + s64 rc; + + /* Fetch master PE */ + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER))) + return; + + pe_no = pe->pe_number; + } + + /* Freeze master PE */ + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } + + /* Freeze slave PEs */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_SET_FREEZE_ALL); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + slave->pe_number); + } +} + +static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) +{ + struct pnv_ioda_pe *pe, *slave; + s64 rc; + + /* Find master PE */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Clear frozen state for master PE */ + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, pe_no); + return -EIO; + } + + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Clear frozen state for slave PEs */ + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n", + __func__, rc, opt, phb->hose->global_number, + slave->pe_number); + return -EIO; + } + } + + return 0; +} + +static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) +{ + struct pnv_ioda_pe *slave, *pe; + u8 fstate = 0, state; + __be16 pcierr = 0; + s64 rc; + + /* Sanity check on PE number */ + if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num) + return OPAL_EEH_STOPPED_PERM_UNAVAIL; + + /* + * Fetch the master PE and the PE instance might be + * not initialized yet. + */ + pe = &phb->ioda.pe_array[pe_no]; + if (pe->flags & PNV_IODA_PE_SLAVE) { + pe = pe->master; + WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pe->pe_number; + } + + /* Check the master PE */ + rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &state, &pcierr, NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, pe_no); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* Check the slave PE */ + if (!(pe->flags & PNV_IODA_PE_MASTER)) + return state; + + list_for_each_entry(slave, &pe->slaves, list) { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + slave->pe_number, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting " + "PHB#%x-PE#%x state\n", + __func__, rc, + phb->hose->global_number, slave->pe_number); + return OPAL_EEH_STOPPED_TEMP_UNAVAIL; + } + + /* + * Override the result based on the ascending + * priority. + */ + if (fstate > state) + state = fstate; + } + + return state; +} + +struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn) +{ + int pe_number = phb->ioda.pe_rmap[bdfn]; + + if (pe_number == IODA_INVALID_PE) + return NULL; + + return &phb->ioda.pe_array[pe_number]; +} + +struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); + struct pci_dn *pdn = pci_get_pdn(dev); + + if (!pdn) + return NULL; + if (pdn->pe_number == IODA_INVALID_PE) + return NULL; + return &phb->ioda.pe_array[pdn->pe_number]; +} + +static int pnv_ioda_set_one_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *parent, + struct pnv_ioda_pe *child, + bool is_add) +{ + const char *desc = is_add ? "adding" : "removing"; + uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN : + OPAL_REMOVE_PE_FROM_DOMAIN; + struct pnv_ioda_pe *slave; + long rc; + + /* Parent PE affects child PE */ + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + child->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(child, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + + if (!(child->flags & PNV_IODA_PE_MASTER)) + return 0; + + /* Compound case: parent PE affects slave PEs */ + list_for_each_entry(slave, &child->slaves, list) { + rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number, + slave->pe_number, op); + if (rc != OPAL_SUCCESS) { + pe_warn(slave, "OPAL error %ld %s to parent PELTV\n", + rc, desc); + return -ENXIO; + } + } + + return 0; +} + +static int pnv_ioda_set_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + bool is_add) +{ + struct pnv_ioda_pe *slave; + struct pci_dev *pdev = NULL; + int ret; + + /* + * Clear PE frozen state. If it's master PE, we need + * clear slave PE frozen state as well. + */ + if (is_add) { + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) + opal_pci_eeh_freeze_clear(phb->opal_id, + slave->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + } + } + + /* + * Associate PE in PELT. We need add the PE into the + * corresponding PELT-V as well. Otherwise, the error + * originated from the PE might contribute to other + * PEs. + */ + ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add); + if (ret) + return ret; + + /* For compound PEs, any one affects all of them */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry(slave, &pe->slaves, list) { + ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add); + if (ret) + return ret; + } + } + + if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) + pdev = pe->pbus->self; + else if (pe->flags & PNV_IODA_PE_DEV) + pdev = pe->pdev->bus->self; +#ifdef CONFIG_PCI_IOV + else if (pe->flags & PNV_IODA_PE_VF) + pdev = pe->parent_dev; +#endif /* CONFIG_PCI_IOV */ + while (pdev) { + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *parent; + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + parent = &phb->ioda.pe_array[pdn->pe_number]; + ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add); + if (ret) + return ret; + } + + pdev = pdev->bus->self; + } + + return 0; +} + +static void pnv_ioda_unset_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + struct pci_dev *parent) +{ + int64_t rc; + + while (parent) { + struct pci_dn *pdn = pci_get_pdn(parent); + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, + pe->pe_number, + OPAL_REMOVE_PE_FROM_DOMAIN); + /* XXX What to do in case of error ? */ + } + parent = parent->bus->self; + } + + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + + /* Disassociate PE in PELT */ + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); + if (rc) + pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); +} + +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +{ + struct pci_dev *parent; + uint8_t bcomp, dcomp, fcomp; + int64_t rc; + long rid_end, rid; + + /* Currently, we just deconfigure VF PE. Bus PE will always there.*/ + if (pe->pbus) { + int count; + + dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; + fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; + parent = pe->pbus->self; + if (pe->flags & PNV_IODA_PE_BUS_ALL) + count = resource_size(&pe->pbus->busn_res); + else + count = 1; + + switch(count) { + case 1: bcomp = OpalPciBusAll; break; + case 2: bcomp = OpalPciBus7Bits; break; + case 4: bcomp = OpalPciBus6Bits; break; + case 8: bcomp = OpalPciBus5Bits; break; + case 16: bcomp = OpalPciBus4Bits; break; + case 32: bcomp = OpalPciBus3Bits; break; + default: + dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n", + count); + /* Do an exact match only */ + bcomp = OpalPciBusAll; + } + rid_end = pe->rid + (count << 8); + } else { +#ifdef CONFIG_PCI_IOV + if (pe->flags & PNV_IODA_PE_VF) + parent = pe->parent_dev; + else +#endif + parent = pe->pdev->bus->self; + bcomp = OpalPciBusAll; + dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; + fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; + rid_end = pe->rid + 1; + } + + /* Clear the reverse map */ + for (rid = pe->rid; rid < rid_end; rid++) + phb->ioda.pe_rmap[rid] = IODA_INVALID_PE; + + /* + * Release from all parents PELT-V. NPUs don't have a PELTV + * table + */ + if (phb->type != PNV_PHB_NPU_OCAPI) + pnv_ioda_unset_peltv(phb, pe, parent); + + rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, + bcomp, dcomp, fcomp, OPAL_UNMAP_PE); + if (rc) + pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc); + + pe->pbus = NULL; + pe->pdev = NULL; +#ifdef CONFIG_PCI_IOV + pe->parent_dev = NULL; +#endif + + return 0; +} + +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) +{ + uint8_t bcomp, dcomp, fcomp; + long rc, rid_end, rid; + + /* Bus validation ? */ + if (pe->pbus) { + int count; + + dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; + fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; + if (pe->flags & PNV_IODA_PE_BUS_ALL) + count = resource_size(&pe->pbus->busn_res); + else + count = 1; + + switch(count) { + case 1: bcomp = OpalPciBusAll; break; + case 2: bcomp = OpalPciBus7Bits; break; + case 4: bcomp = OpalPciBus6Bits; break; + case 8: bcomp = OpalPciBus5Bits; break; + case 16: bcomp = OpalPciBus4Bits; break; + case 32: bcomp = OpalPciBus3Bits; break; + default: + dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n", + count); + /* Do an exact match only */ + bcomp = OpalPciBusAll; + } + rid_end = pe->rid + (count << 8); + } else { + bcomp = OpalPciBusAll; + dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; + fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; + rid_end = pe->rid + 1; + } + + /* + * Associate PE in PELT. We need add the PE into the + * corresponding PELT-V as well. Otherwise, the error + * originated from the PE might contribute to other + * PEs. + */ + rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, + bcomp, dcomp, fcomp, OPAL_MAP_PE); + if (rc) { + pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); + return -ENXIO; + } + + /* + * Configure PELTV. NPUs don't have a PELTV table so skip + * configuration on them. + */ + if (phb->type != PNV_PHB_NPU_OCAPI) + pnv_ioda_set_peltv(phb, pe, true); + + /* Setup reverse map */ + for (rid = pe->rid; rid < rid_end; rid++) + phb->ioda.pe_rmap[rid] = pe->pe_number; + + pe->mve_number = 0; + + return 0; +} + +static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); + struct pci_dn *pdn = pci_get_pdn(dev); + struct pnv_ioda_pe *pe; + + if (!pdn) { + pr_err("%s: Device tree node not associated properly\n", + pci_name(dev)); + return NULL; + } + if (pdn->pe_number != IODA_INVALID_PE) + return NULL; + + pe = pnv_ioda_alloc_pe(phb, 1); + if (!pe) { + pr_warn("%s: Not enough PE# available, disabling device\n", + pci_name(dev)); + return NULL; + } + + /* NOTE: We don't get a reference for the pointer in the PE + * data structure, both the device and PE structures should be + * destroyed at the same time. + * + * At some point we want to remove the PDN completely anyways + */ + pdn->pe_number = pe->pe_number; + pe->flags = PNV_IODA_PE_DEV; + pe->pdev = dev; + pe->pbus = NULL; + pe->mve_number = -1; + pe->rid = dev->bus->number << 8 | pdn->devfn; + pe->device_count++; + + pe_info(pe, "Associated device to PE\n"); + + if (pnv_ioda_configure_pe(phb, pe)) { + /* XXX What do we do here ? */ + pnv_ioda_free_pe(pe); + pdn->pe_number = IODA_INVALID_PE; + pe->pdev = NULL; + return NULL; + } + + /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_add_tail(&pe->list, &phb->ioda.pe_list); + mutex_unlock(&phb->ioda.pe_list_mutex); + return pe; +} + +/* + * There're 2 types of PCI bus sensitive PEs: One that is compromised of + * single PCI bus. Another one that contains the primary PCI bus and its + * subordinate PCI devices and buses. The second type of PE is normally + * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. + */ +static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); + struct pnv_ioda_pe *pe = NULL; + unsigned int pe_num; + + /* + * In partial hotplug case, the PE instance might be still alive. + * We should reuse it instead of allocating a new one. + */ + pe_num = phb->ioda.pe_rmap[bus->number << 8]; + if (WARN_ON(pe_num != IODA_INVALID_PE)) { + pe = &phb->ioda.pe_array[pe_num]; + return NULL; + } + + /* PE number for root bus should have been reserved */ + if (pci_is_root_bus(bus)) + pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx]; + + /* Check if PE is determined by M64 */ + if (!pe) + pe = pnv_ioda_pick_m64_pe(bus, all); + + /* The PE number isn't pinned by M64 */ + if (!pe) + pe = pnv_ioda_alloc_pe(phb, 1); + + if (!pe) { + pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n", + __func__, pci_domain_nr(bus), bus->number); + return NULL; + } + + pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); + pe->pbus = bus; + pe->pdev = NULL; + pe->mve_number = -1; + pe->rid = bus->busn_res.start << 8; + + if (all) + pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n", + &bus->busn_res.start, &bus->busn_res.end, + pe->pe_number); + else + pe_info(pe, "Secondary bus %pad associated with PE#%x\n", + &bus->busn_res.start, pe->pe_number); + + if (pnv_ioda_configure_pe(phb, pe)) { + /* XXX What do we do here ? */ + pnv_ioda_free_pe(pe); + pe->pbus = NULL; + return NULL; + } + + /* Put PE to the list */ + list_add_tail(&pe->list, &phb->ioda.pe_list); + + return pe; +} + +static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + + /* Check if the BDFN for this device is associated with a PE yet */ + pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev)); + if (!pe) { + /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */ + if (WARN_ON(pdev->is_virtfn)) + return; + + pnv_pci_configure_bus(pdev->bus); + pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev)); + pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff); + + + /* + * If we can't setup the IODA PE something has gone horribly + * wrong and we can't enable DMA for the device. + */ + if (WARN_ON(!pe)) + return; + } else { + pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number); + } + + /* + * We assume that bridges *probably* don't need to do any DMA so we can + * skip allocating a TCE table, etc unless we get a non-bridge device. + */ + if (!pe->dma_setup_done && !pci_is_bridge(pdev)) { + switch (phb->type) { + case PNV_PHB_IODA2: + pnv_pci_ioda2_setup_dma_pe(phb, pe); + break; + default: + pr_warn("%s: No DMA for PHB#%x (type %d)\n", + __func__, phb->hose->global_number, phb->type); + } + } + + if (pdn) + pdn->pe_number = pe->pe_number; + pe->device_count++; + + WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); + pdev->dev.archdata.dma_offset = pe->tce_bypass_base; + set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]); + + /* PEs with a DMA weight of zero won't have a group */ + if (pe->table_group.group) + iommu_add_device(&pe->table_group, &pdev->dev); +} + +/* + * Reconfigure TVE#0 to be usable as 64-bit DMA space. + * + * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses. + * Devices can only access more than that if bit 59 of the PCI address is set + * by hardware, which indicates TVE#1 should be used instead of TVE#0. + * Many PCI devices are not capable of addressing that many bits, and as a + * result are limited to the 4GB of virtual memory made available to 32-bit + * devices in TVE#0. + * + * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit + * devices by configuring the virtual memory past the first 4GB inaccessible + * by 64-bit DMAs. This should only be used by devices that want more than + * 4GB, and only on PEs that have no 32-bit devices. + * + * Currently this will only work on PHB3 (POWER8). + */ +static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe) +{ + u64 window_size, table_size, tce_count, addr; + struct page *table_pages; + u64 tce_order = 28; /* 256MB TCEs */ + __be64 *tces; + s64 rc; + + /* + * Window size needs to be a power of two, but needs to account for + * shifting memory by the 4GB offset required to skip 32bit space. + */ + window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32)); + tce_count = window_size >> tce_order; + table_size = tce_count << 3; + + if (table_size < PAGE_SIZE) + table_size = PAGE_SIZE; + + table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL, + get_order(table_size)); + if (!table_pages) + goto err; + + tces = page_address(table_pages); + if (!tces) + goto err; + + memset(tces, 0, table_size); + + for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) { + tces[(addr + (1ULL << 32)) >> tce_order] = + cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE); + } + + rc = opal_pci_map_pe_dma_window(pe->phb->opal_id, + pe->pe_number, + /* reconfigure window 0 */ + (pe->pe_number << 1) + 0, + 1, + __pa(tces), + table_size, + 1 << tce_order); + if (rc == OPAL_SUCCESS) { + pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n"); + return 0; + } +err: + pe_err(pe, "Error configuring 64-bit DMA bypass\n"); + return -EIO; +} + +static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, + u64 dma_mask) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + + if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) + return false; + + pe = &phb->ioda.pe_array[pdn->pe_number]; + if (pe->tce_bypass_enabled) { + u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1; + if (dma_mask >= top) + return true; + } + + /* + * If the device can't set the TCE bypass bit but still wants + * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to + * bypass the 32-bit region and be usable for 64-bit DMAs. + * The device needs to be able to address all of this space. + */ + if (dma_mask >> 32 && + dma_mask > (memory_hotplug_max() + (1ULL << 32)) && + /* pe->pdev should be set if it's a single device, pe->pbus if not */ + (pe->device_count == 1 || !pe->pbus) && + phb->model == PNV_PHB_MODEL_PHB3) { + /* Configure the bypass mode */ + s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe); + if (rc) + return false; + /* 4GB offset bypasses 32-bit space */ + pdev->dev.archdata.dma_offset = (1ULL << 32); + return true; + } + + return false; +} + +static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb) +{ + return phb->regs + 0x210; +} + +#ifdef CONFIG_IOMMU_API +/* Common for IODA1 and IODA2 */ +static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction) +{ + return pnv_tce_xchg(tbl, index, hpa, direction); +} +#endif + +#define PHB3_TCE_KILL_INVAL_ALL PPC_BIT(0) +#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1) +#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2) + +static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe) +{ + /* 01xb - invalidate TCEs that match the specified PE# */ + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb); + unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); + + mb(); /* Ensure above stores are visible */ + __raw_writeq_be(val, invalidate); +} + +static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, + unsigned shift, unsigned long index, + unsigned long npages) +{ + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb); + unsigned long start, end, inc; + + /* We'll invalidate DMA address in PE scope */ + start = PHB3_TCE_KILL_INVAL_ONE; + start |= (pe->pe_number & 0xFF); + end = start; + + /* Figure out the start, end and step */ + start |= (index << shift); + end |= ((index + npages - 1) << shift); + inc = (0x1ull << shift); + mb(); + + while (start <= end) { + __raw_writeq_be(start, invalidate); + start += inc; + } +} + +static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + + if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) + pnv_pci_phb3_tce_invalidate_pe(pe); + else + opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL_PE, + pe->pe_number, 0, 0, 0); +} + +static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, + unsigned long index, unsigned long npages) +{ + struct iommu_table_group_link *tgl; + + list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) { + struct pnv_ioda_pe *pe = container_of(tgl->table_group, + struct pnv_ioda_pe, table_group); + struct pnv_phb *phb = pe->phb; + unsigned int shift = tbl->it_page_shift; + + if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) + pnv_pci_phb3_tce_invalidate(pe, shift, + index, npages); + else + opal_pci_tce_kill(phb->opal_id, + OPAL_PCI_TCE_KILL_PAGES, + pe->pe_number, 1u << shift, + index << shift, npages); + } +} + +static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + unsigned long attrs) +{ + int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, + attrs); + + if (!ret) + pnv_pci_ioda2_tce_invalidate(tbl, index, npages); + + return ret; +} + +static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, + long npages) +{ + pnv_tce_free(tbl, index, npages); + + pnv_pci_ioda2_tce_invalidate(tbl, index, npages); +} + +static struct iommu_table_ops pnv_ioda2_iommu_ops = { + .set = pnv_ioda2_tce_build, +#ifdef CONFIG_IOMMU_API + .xchg_no_kill = pnv_ioda_tce_xchg_no_kill, + .tce_kill = pnv_pci_ioda2_tce_invalidate, + .useraddrptr = pnv_tce_useraddrptr, +#endif + .clear = pnv_ioda2_tce_free, + .get = pnv_tce_get, + .free = pnv_pci_ioda2_table_free_pages, +}; + +static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, + int num, struct iommu_table *tbl) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + int64_t rc; + const unsigned long size = tbl->it_indirect_levels ? + tbl->it_level_size : tbl->it_size; + const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; + const __u64 win_size = tbl->it_size << tbl->it_page_shift; + + pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n", + num, start_addr, start_addr + win_size - 1, + IOMMU_PAGE_SIZE(tbl)); + + /* + * Map TCE table through TVT. The TVE index is the PE number + * shifted by 1 bit for 32-bits DMA space. + */ + rc = opal_pci_map_pe_dma_window(phb->opal_id, + pe->pe_number, + (pe->pe_number << 1) + num, + tbl->it_indirect_levels + 1, + __pa(tbl->it_base), + size << 3, + IOMMU_PAGE_SIZE(tbl)); + if (rc) { + pe_err(pe, "Failed to configure TCE table, err %lld\n", rc); + return rc; + } + + pnv_pci_link_table_and_group(phb->hose->node, num, + tbl, &pe->table_group); + pnv_pci_ioda2_tce_invalidate_pe(pe); + + return 0; +} + +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) +{ + uint16_t window_id = (pe->pe_number << 1 ) + 1; + int64_t rc; + + pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis"); + if (enable) { + phys_addr_t top = memblock_end_of_DRAM(); + + top = roundup_pow_of_two(top); + rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, + pe->pe_number, + window_id, + pe->tce_bypass_base, + top); + } else { + rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, + pe->pe_number, + window_id, + pe->tce_bypass_base, + 0); + } + if (rc) + pe_err(pe, "OPAL error %lld configuring bypass window\n", rc); + else + pe->tce_bypass_enabled = enable; +} + +static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, + int num, __u32 page_shift, __u64 window_size, __u32 levels, + bool alloc_userspace_copy, struct iommu_table **ptbl) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + int nid = pe->phb->hose->node; + __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start; + long ret; + struct iommu_table *tbl; + + tbl = pnv_pci_table_alloc(nid); + if (!tbl) + return -ENOMEM; + + tbl->it_ops = &pnv_ioda2_iommu_ops; + + ret = pnv_pci_ioda2_table_alloc_pages(nid, + bus_offset, page_shift, window_size, + levels, alloc_userspace_copy, tbl); + if (ret) { + iommu_tce_table_put(tbl); + return ret; + } + + *ptbl = tbl; + + return 0; +} + +static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) +{ + struct iommu_table *tbl = NULL; + long rc; + unsigned long res_start, res_end; + + /* + * crashkernel= specifies the kdump kernel's maximum memory at + * some offset and there is no guaranteed the result is a power + * of 2, which will cause errors later. + */ + const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max()); + + /* + * In memory constrained environments, e.g. kdump kernel, the + * DMA window can be larger than available memory, which will + * cause errors later. + */ + const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER); + + /* + * We create the default window as big as we can. The constraint is + * the max order of allocation possible. The TCE table is likely to + * end up being multilevel and with on-demand allocation in place, + * the initial use is not going to be huge as the default window aims + * to support crippled devices (i.e. not fully 64bit DMAble) only. + */ + /* iommu_table::it_map uses 1 bit per IOMMU page, hence 8 */ + const u64 window_size = min((maxblock * 8) << PAGE_SHIFT, max_memory); + /* Each TCE level cannot exceed maxblock so go multilevel if needed */ + unsigned long tces_order = ilog2(window_size >> PAGE_SHIFT); + unsigned long tcelevel_order = ilog2(maxblock >> 3); + unsigned int levels = tces_order / tcelevel_order; + + if (tces_order % tcelevel_order) + levels += 1; + /* + * We try to stick to default levels (which is >1 at the moment) in + * order to save memory by relying on on-demain TCE level allocation. + */ + levels = max_t(unsigned int, levels, POWERNV_IOMMU_DEFAULT_LEVELS); + + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, PAGE_SHIFT, + window_size, levels, false, &tbl); + if (rc) { + pe_err(pe, "Failed to create 32-bit TCE table, err %ld", + rc); + return rc; + } + + /* We use top part of 32bit space for MMIO so exclude it from DMA */ + res_start = 0; + res_end = 0; + if (window_size > pe->phb->ioda.m32_pci_base) { + res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift; + res_end = min(window_size, SZ_4G) >> tbl->it_page_shift; + } + + tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number; + if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end)) + rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl); + else + rc = -ENOMEM; + if (rc) { + pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc); + iommu_tce_table_put(tbl); + tbl = NULL; /* This clears iommu_table_base below */ + } + if (!pnv_iommu_bypass_disabled) + pnv_pci_ioda2_set_bypass(pe, true); + + /* + * Set table base for the case of IOMMU DMA use. Usually this is done + * from dma_dev_setup() which is not called when a device is returned + * from VFIO so do it here. + */ + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + + return 0; +} + +static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + struct pnv_phb *phb = pe->phb; + long ret; + + pe_info(pe, "Removing DMA window #%d\n", num); + + ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + (pe->pe_number << 1) + num, + 0/* levels */, 0/* table address */, + 0/* table size */, 0/* page size */); + if (ret) + pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); + else + pnv_pci_ioda2_tce_invalidate_pe(pe); + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + + return ret; +} + +#ifdef CONFIG_IOMMU_API +unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, + __u64 window_size, __u32 levels) +{ + unsigned long bytes = 0; + const unsigned window_shift = ilog2(window_size); + unsigned entries_shift = window_shift - page_shift; + unsigned table_shift = entries_shift + 3; + unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift); + unsigned long direct_table_size; + + if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || + !is_power_of_2(window_size)) + return 0; + + /* Calculate a direct table size from window_size and levels */ + entries_shift = (entries_shift + levels - 1) / levels; + table_shift = entries_shift + 3; + table_shift = max_t(unsigned, table_shift, PAGE_SHIFT); + direct_table_size = 1UL << table_shift; + + for ( ; levels; --levels) { + bytes += ALIGN(tce_table_size, direct_table_size); + + tce_table_size /= direct_table_size; + tce_table_size <<= 3; + tce_table_size = max_t(unsigned long, + tce_table_size, direct_table_size); + } + + return bytes + bytes; /* one for HW table, one for userspace copy */ +} + +static long pnv_pci_ioda2_create_table_userspace( + struct iommu_table_group *table_group, + int num, __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) +{ + long ret = pnv_pci_ioda2_create_table(table_group, + num, page_shift, window_size, levels, true, ptbl); + + if (!ret) + (*ptbl)->it_allocated_size = pnv_pci_ioda2_get_table_size( + page_shift, window_size, levels); + return ret; +} + +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); + dev->dev.archdata.dma_offset = pe->tce_bypass_base; + + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + +static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */ + struct iommu_table *tbl = pe->table_group.tables[0]; + + /* + * iommu_ops transfers the ownership per a device and we mode + * the group ownership with the first device in the group. + */ + if (!tbl) + return 0; + + pnv_pci_ioda2_set_bypass(pe, false); + pnv_pci_ioda2_unset_window(&pe->table_group, 0); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus); + else if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, NULL); + iommu_tce_table_put(tbl); + + return 0; +} + +static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) +{ + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, + table_group); + + /* See the comment about iommu_ops above */ + if (pe->table_group.tables[0]) + return; + pnv_pci_ioda2_setup_default_config(pe); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus); +} + +static struct iommu_table_group_ops pnv_pci_ioda2_ops = { + .get_table_size = pnv_pci_ioda2_get_table_size, + .create_table = pnv_pci_ioda2_create_table_userspace, + .set_window = pnv_pci_ioda2_set_window, + .unset_window = pnv_pci_ioda2_unset_window, + .take_ownership = pnv_ioda2_take_ownership, + .release_ownership = pnv_ioda2_release_ownership, +}; +#endif + +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, + struct pnv_ioda_pe *pe) +{ + int64_t rc; + + /* TVE #1 is selected by PCI address bit 59 */ + pe->tce_bypass_base = 1ull << 59; + + /* The PE will reserve all possible 32-bits space */ + pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n", + phb->ioda.m32_pci_base); + + /* Setup linux iommu table */ + pe->table_group.tce32_start = 0; + pe->table_group.tce32_size = phb->ioda.m32_pci_base; + pe->table_group.max_dynamic_windows_supported = + IOMMU_TABLE_GROUP_MAX_TABLES; + pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS; + pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb); + + rc = pnv_pci_ioda2_setup_default_config(pe); + if (rc) + return; + +#ifdef CONFIG_IOMMU_API + pe->table_group.ops = &pnv_pci_ioda2_ops; + iommu_register_group(&pe->table_group, phb->hose->global_number, + pe->pe_number); +#endif + pe->dma_setup_done = true; +} + +/* + * Called from KVM in real mode to EOI passthru interrupts. The ICP + * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru(). + * + * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call + * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ + * numbers of the in-the-middle MSI domain are vector numbers and it's + * good enough for OPAL. Use that. + */ +int64_t pnv_opal_pci_msi_eoi(struct irq_data *d) +{ + struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data); + struct pnv_phb *phb = hose->private_data; + + return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq); +} + +/* + * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers + */ +static void pnv_ioda2_msi_eoi(struct irq_data *d) +{ + int64_t rc; + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; + + rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); + WARN_ON_ONCE(rc); + + icp_native_eoi(d); +} + +/* P8/CXL only */ +void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) +{ + struct irq_data *idata; + struct irq_chip *ichip; + + /* The MSI EOI OPAL call is only needed on PHB3 */ + if (phb->model != PNV_PHB_MODEL_PHB3) + return; + + if (!phb->ioda.irq_chip_init) { + /* + * First time we setup an MSI IRQ, we need to setup the + * corresponding IRQ chip to route correctly. + */ + idata = irq_get_irq_data(virq); + ichip = irq_data_get_irq_chip(idata); + phb->ioda.irq_chip_init = 1; + phb->ioda.irq_chip = *ichip; + phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi; + } + irq_set_chip(virq, &phb->ioda.irq_chip); + irq_set_chip_data(virq, phb->hose); +} + +static struct irq_chip pnv_pci_msi_irq_chip; + +/* + * Returns true iff chip is something that we could call + * pnv_opal_pci_msi_eoi for. + */ +bool is_pnv_opal_msi(struct irq_chip *chip) +{ + return chip == &pnv_pci_msi_irq_chip; +} +EXPORT_SYMBOL_GPL(is_pnv_opal_msi); + +static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, + unsigned int xive_num, + unsigned int is_64, struct msi_msg *msg) +{ + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); + __be32 data; + int rc; + + dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__, + is_64 ? "64" : "32", xive_num); + + /* No PE assigned ? bail out ... no MSI for you ! */ + if (pe == NULL) + return -ENXIO; + + /* Check if we have an MVE */ + if (pe->mve_number < 0) + return -ENXIO; + + /* Force 32-bit MSI on some broken devices */ + if (dev->no_64bit_msi) + is_64 = 0; + + /* Assign XIVE to PE */ + rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); + if (rc) { + pr_warn("%s: OPAL error %d setting XIVE %d PE\n", + pci_name(dev), rc, xive_num); + return -EIO; + } + + if (is_64) { + __be64 addr64; + + rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, + &addr64, &data); + if (rc) { + pr_warn("%s: OPAL error %d getting 64-bit MSI data\n", + pci_name(dev), rc); + return -EIO; + } + msg->address_hi = be64_to_cpu(addr64) >> 32; + msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; + } else { + __be32 addr32; + + rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, + &addr32, &data); + if (rc) { + pr_warn("%s: OPAL error %d getting 32-bit MSI data\n", + pci_name(dev), rc); + return -EIO; + } + msg->address_hi = 0; + msg->address_lo = be32_to_cpu(addr32); + } + msg->data = be32_to_cpu(data); + + return 0; +} + +/* + * The msi_free() op is called before irq_domain_free_irqs_top() when + * the handler data is still available. Use that to clear the XIVE + * controller. + */ +static void pnv_msi_ops_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, + unsigned int irq) +{ + if (xive_enabled()) + xive_irq_free_data(irq); +} + +static struct msi_domain_ops pnv_pci_msi_domain_ops = { + .msi_free = pnv_msi_ops_msi_free, +}; + +static void pnv_msi_shutdown(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_shutdown) + d->chip->irq_shutdown(d); +} + +static void pnv_msi_mask(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void pnv_msi_unmask(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static struct irq_chip pnv_pci_msi_irq_chip = { + .name = "PNV-PCI-MSI", + .irq_shutdown = pnv_msi_shutdown, + .irq_mask = pnv_msi_mask, + .irq_unmask = pnv_msi_unmask, + .irq_eoi = irq_chip_eoi_parent, +}; + +static struct msi_domain_info pnv_msi_domain_info = { + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), + .ops = &pnv_pci_msi_domain_ops, + .chip = &pnv_pci_msi_irq_chip, +}; + +static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct msi_desc *entry = irq_data_get_msi_desc(d); + struct pci_dev *pdev = msi_desc_to_pci_dev(entry); + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; + int rc; + + rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq, + entry->pci.msi_attrib.is_64, msg); + if (rc) + dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n", + entry->pci.msi_attrib.is_64 ? "64" : "32", d->hwirq, rc); +} + +/* + * The IRQ data is mapped in the MSI domain in which HW IRQ numbers + * correspond to vector numbers. + */ +static void pnv_msi_eoi(struct irq_data *d) +{ + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; + + if (phb->model == PNV_PHB_MODEL_PHB3) { + /* + * The EOI OPAL call takes an OPAL HW IRQ number but + * since it is translated into a vector number in + * OPAL, use that directly. + */ + WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq)); + } + + irq_chip_eoi_parent(d); +} + +static struct irq_chip pnv_msi_irq_chip = { + .name = "PNV-MSI", + .irq_shutdown = pnv_msi_shutdown, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = pnv_msi_eoi, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_compose_msi_msg = pnv_msi_compose_msg, +}; + +static int pnv_irq_parent_domain_alloc(struct irq_domain *domain, + unsigned int virq, int hwirq) +{ + struct irq_fwspec parent_fwspec; + int ret; + + parent_fwspec.fwnode = domain->parent->fwnode; + parent_fwspec.param_count = 2; + parent_fwspec.param[0] = hwirq; + parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + + ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec); + if (ret) + return ret; + + return 0; +} + +static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct pci_controller *hose = domain->host_data; + struct pnv_phb *phb = hose->private_data; + msi_alloc_info_t *info = arg; + struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc); + int hwirq; + int i, ret; + + hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs); + if (hwirq < 0) { + dev_warn(&pdev->dev, "failed to find a free MSI\n"); + return -ENOSPC; + } + + dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__, + hose->dn, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + ret = pnv_irq_parent_domain_alloc(domain, virq + i, + phb->msi_base + hwirq + i); + if (ret) + goto out; + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &pnv_msi_irq_chip, hose); + } + + return 0; + +out: + irq_domain_free_irqs_parent(domain, virq, i - 1); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs); + return ret; +} + +static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct pci_controller *hose = irq_data_get_irq_chip_data(d); + struct pnv_phb *phb = hose->private_data; + + pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn, + virq, d->hwirq, nr_irqs); + + msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs); + /* XIVE domain is cleared through ->msi_free() */ +} + +static const struct irq_domain_ops pnv_irq_domain_ops = { + .alloc = pnv_irq_domain_alloc, + .free = pnv_irq_domain_free, +}; + +static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) +{ + struct pnv_phb *phb = hose->private_data; + struct irq_domain *parent = irq_get_default_host(); + + hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id); + if (!hose->fwnode) + return -ENOMEM; + + hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count, + hose->fwnode, + &pnv_irq_domain_ops, hose); + if (!hose->dev_domain) { + pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n", + hose->dn, hose->global_number); + irq_domain_free_fwnode(hose->fwnode); + return -ENOMEM; + } + + hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn), + &pnv_msi_domain_info, + hose->dev_domain); + if (!hose->msi_domain) { + pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n", + hose->dn, hose->global_number); + irq_domain_free_fwnode(hose->fwnode); + irq_domain_remove(hose->dev_domain); + return -ENOMEM; + } + + return 0; +} + +static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb) +{ + unsigned int count; + const __be32 *prop = of_get_property(phb->hose->dn, + "ibm,opal-msi-ranges", NULL); + if (!prop) { + /* BML Fallback */ + prop = of_get_property(phb->hose->dn, "msi-ranges", NULL); + } + if (!prop) + return; + + phb->msi_base = be32_to_cpup(prop); + count = be32_to_cpup(prop + 1); + if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { + pr_err("PCI %d: Failed to allocate MSI bitmap !\n", + phb->hose->global_number); + return; + } + + pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", + count, phb->msi_base); + + pnv_msi_allocate_domains(phb->hose, count); +} + +static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, + struct resource *res) +{ + struct pnv_phb *phb = pe->phb; + struct pci_bus_region region; + int index; + int64_t rc; + + if (!res || !res->flags || res->start > res->end || + res->flags & IORESOURCE_UNSET) + return; + + if (res->flags & IORESOURCE_IO) { + region.start = res->start - phb->ioda.io_pci_base; + region.end = res->end - phb->ioda.io_pci_base; + index = region.start / phb->ioda.io_segsize; + + while (index < phb->ioda.total_pe_num && + region.start <= region.end) { + phb->ioda.io_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: Error %lld mapping IO segment#%d to PE#%x\n", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.io_segsize; + index++; + } + } else if ((res->flags & IORESOURCE_MEM) && + !pnv_pci_is_m64(phb, res)) { + region.start = res->start - + phb->hose->mem_offset[0] - + phb->ioda.m32_pci_base; + region.end = res->end - + phb->hose->mem_offset[0] - + phb->ioda.m32_pci_base; + index = region.start / phb->ioda.m32_segsize; + + while (index < phb->ioda.total_pe_num && + region.start <= region.end) { + phb->ioda.m32_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: Error %lld mapping M32 segment#%d to PE#%x", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.m32_segsize; + index++; + } + } +} + +/* + * This function is supposed to be called on basis of PE from top + * to bottom style. So the I/O or MMIO segment assigned to + * parent PE could be overridden by its child PEs if necessary. + */ +static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) +{ + struct pci_dev *pdev; + int i; + + /* + * NOTE: We only care PCI bus based PE for now. For PCI + * device based PE, for example SRIOV sensitive VF should + * be figured out later. + */ + BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); + + list_for_each_entry(pdev, &pe->pbus->devices, bus_list) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) + pnv_ioda_setup_pe_res(pe, &pdev->resource[i]); + + /* + * If the PE contains all subordinate PCI buses, the + * windows of the child bridges should be mapped to + * the PE as well. + */ + if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev)) + continue; + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) + pnv_ioda_setup_pe_res(pe, + &pdev->resource[PCI_BRIDGE_RESOURCES + i]); + } +} + +#ifdef CONFIG_DEBUG_FS +static int pnv_pci_diag_data_set(void *data, u64 val) +{ + struct pnv_phb *phb = data; + s64 ret; + + /* Retrieve the diag data from firmware */ + ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data, + phb->diag_data_size); + if (ret != OPAL_SUCCESS) + return -EIO; + + /* Print the diag data to the kernel log */ + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set, + "%llu\n"); + +static int pnv_pci_ioda_pe_dump(void *data, u64 val) +{ + struct pnv_phb *phb = data; + int pe_num; + + for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { + struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num]; + + if (!test_bit(pe_num, phb->ioda.pe_alloc)) + continue; + + pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n", + pe->rid, pe->device_count, + (pe->flags & PNV_IODA_PE_DEV) ? "dev " : "", + (pe->flags & PNV_IODA_PE_BUS) ? "bus " : "", + (pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "", + (pe->flags & PNV_IODA_PE_MASTER) ? "master " : "", + (pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "", + (pe->flags & PNV_IODA_PE_VF) ? "vf " : ""); + } + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL, + pnv_pci_ioda_pe_dump, "%llu\n"); + +#endif /* CONFIG_DEBUG_FS */ + +static void pnv_pci_ioda_create_dbgfs(void) +{ +#ifdef CONFIG_DEBUG_FS + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + char name[16]; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + sprintf(name, "PCI%04x", hose->global_number); + phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir); + + debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs, + phb, &pnv_pci_diag_data_fops); + debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs, + phb, &pnv_pci_ioda_pe_dump_fops); + } +#endif /* CONFIG_DEBUG_FS */ +} + +static void pnv_pci_enable_bridge(struct pci_bus *bus) +{ + struct pci_dev *dev = bus->self; + struct pci_bus *child; + + /* Empty bus ? bail */ + if (list_empty(&bus->devices)) + return; + + /* + * If there's a bridge associated with that bus enable it. This works + * around races in the generic code if the enabling is done during + * parallel probing. This can be removed once those races have been + * fixed. + */ + if (dev) { + int rc = pci_enable_device(dev); + if (rc) + pci_err(dev, "Error enabling bridge (%d)\n", rc); + pci_set_master(dev); + } + + /* Perform the same to child busses */ + list_for_each_entry(child, &bus->children, node) + pnv_pci_enable_bridge(child); +} + +static void pnv_pci_enable_bridges(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) + pnv_pci_enable_bridge(hose->bus); +} + +static void pnv_pci_ioda_fixup(void) +{ + pnv_pci_ioda_create_dbgfs(); + + pnv_pci_enable_bridges(); + +#ifdef CONFIG_EEH + pnv_eeh_post_init(); +#endif +} + +/* + * Returns the alignment for I/O or memory windows for P2P + * bridges. That actually depends on how PEs are segmented. + * For now, we return I/O or M32 segment size for PE sensitive + * P2P bridges. Otherwise, the default values (4KiB for I/O, + * 1MiB for memory) will be returned. + * + * The current PCI bus might be put into one PE, which was + * create against the parent PCI bridge. For that case, we + * needn't enlarge the alignment so that we can save some + * resources. + */ +static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, + unsigned long type) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); + int num_pci_bridges = 0; + struct pci_dev *bridge; + + bridge = bus->self; + while (bridge) { + if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) { + num_pci_bridges++; + if (num_pci_bridges >= 2) + return 1; + } + + bridge = bridge->bus->self; + } + + /* + * We fall back to M32 if M64 isn't supported. We enforce the M64 + * alignment for any 64-bit resource, PCIe doesn't care and + * bridges only do 64-bit prefetchable anyway. + */ + if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type)) + return phb->ioda.m64_segsize; + if (type & IORESOURCE_MEM) + return phb->ioda.m32_segsize; + + return phb->ioda.io_segsize; +} + +/* + * We are updating root port or the upstream port of the + * bridge behind the root port with PHB's windows in order + * to accommodate the changes on required resources during + * PCI (slot) hotplug, which is connected to either root + * port or the downstream ports of PCIe switch behind the + * root port. + */ +static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, + unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dev *bridge = bus->self; + struct resource *r, *w; + bool msi_region = false; + int i; + + /* Check if we need apply fixup to the bridge's windows */ + if (!pci_is_root_bus(bridge->bus) && + !pci_is_root_bus(bridge->bus->self->bus)) + return; + + /* Fixup the resources */ + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + r = &bridge->resource[PCI_BRIDGE_RESOURCES + i]; + if (!r->flags || !r->parent) + continue; + + w = NULL; + if (r->flags & type & IORESOURCE_IO) + w = &hose->io_resource; + else if (pnv_pci_is_m64(phb, r) && + (type & IORESOURCE_PREFETCH) && + phb->ioda.m64_segsize) + w = &hose->mem_resources[1]; + else if (r->flags & type & IORESOURCE_MEM) { + w = &hose->mem_resources[0]; + msi_region = true; + } + + r->start = w->start; + r->end = w->end; + + /* The 64KB 32-bits MSI region shouldn't be included in + * the 32-bits bridge window. Otherwise, we can see strange + * issues. One of them is EEH error observed on Garrison. + * + * Exclude top 1MB region which is the minimal alignment of + * 32-bits bridge window. + */ + if (msi_region) { + r->end += 0x10000; + r->end -= 0x100000; + } + } +} + +static void pnv_pci_configure_bus(struct pci_bus *bus) +{ + struct pci_dev *bridge = bus->self; + struct pnv_ioda_pe *pe; + bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); + + dev_info(&bus->dev, "Configuring PE for bus\n"); + + /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ + if (WARN_ON(list_empty(&bus->devices))) + return; + + /* Reserve PEs according to used M64 resources */ + pnv_ioda_reserve_m64_pe(bus, NULL, all); + + /* + * Assign PE. We might run here because of partial hotplug. + * For the case, we just pick up the existing PE and should + * not allocate resources again. + */ + pe = pnv_ioda_setup_bus_PE(bus, all); + if (!pe) + return; + + pnv_ioda_setup_pe_seg(pe); +} + +static resource_size_t pnv_pci_default_alignment(void) +{ + return PAGE_SIZE; +} + +/* Prevent enabling devices for which we couldn't properly + * assign a PE + */ +static bool pnv_pci_enable_device_hook(struct pci_dev *dev) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(dev); + if (!pdn || pdn->pe_number == IODA_INVALID_PE) { + pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n"); + return false; + } + + return true; +} + +static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) +{ + struct pci_dn *pdn; + struct pnv_ioda_pe *pe; + + pdn = pci_get_pdn(dev); + if (!pdn) + return false; + + if (pdn->pe_number == IODA_INVALID_PE) { + pe = pnv_ioda_setup_dev_PE(dev); + if (!pe) + return false; + } + return true; +} + +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) +{ + struct iommu_table *tbl = pe->table_group.tables[0]; + int64_t rc; + + if (!pe->dma_setup_done) + return; + + rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); + if (rc) + pe_warn(pe, "OPAL error %lld release DMA window\n", rc); + + pnv_pci_ioda2_set_bypass(pe, false); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + WARN_ON(pe->table_group.group); + } + + iommu_tce_table_put(tbl); +} + +static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, + unsigned short win, + unsigned int *map) +{ + struct pnv_phb *phb = pe->phb; + int idx; + int64_t rc; + + for (idx = 0; idx < phb->ioda.total_pe_num; idx++) { + if (map[idx] != pe->pe_number) + continue; + + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, 0, idx); + + if (rc != OPAL_SUCCESS) + pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n", + rc, win, idx); + + map[idx] = IODA_INVALID_PE; + } +} + +static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + + if (phb->type == PNV_PHB_IODA2) { + pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, + phb->ioda.m32_segmap); + } +} + +static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + struct pnv_ioda_pe *slave, *tmp; + + pe_info(pe, "Releasing PE\n"); + + mutex_lock(&phb->ioda.pe_list_mutex); + list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + switch (phb->type) { + case PNV_PHB_IODA2: + pnv_pci_ioda2_release_pe_dma(pe); + break; + case PNV_PHB_NPU_OCAPI: + break; + default: + WARN_ON(1); + } + + pnv_ioda_release_pe_seg(pe); + pnv_ioda_deconfigure_pe(pe->phb, pe); + + /* Release slave PEs in the compound PE */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry_safe(slave, tmp, &pe->slaves, list) { + list_del(&slave->list); + pnv_ioda_free_pe(slave); + } + } + + /* + * The PE for root bus can be removed because of hotplug in EEH + * recovery for fenced PHB error. We need to mark the PE dead so + * that it can be populated again in PCI hot add path. The PE + * shouldn't be destroyed as it's the global reserved resource. + */ + if (phb->ioda.root_pe_idx == pe->pe_number) + return; + + pnv_ioda_free_pe(pe); +} + +static void pnv_pci_release_device(struct pci_dev *pdev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + + /* The VF PE state is torn down when sriov_disable() is called */ + if (pdev->is_virtfn) + return; + + if (!pdn || pdn->pe_number == IODA_INVALID_PE) + return; + +#ifdef CONFIG_PCI_IOV + /* + * FIXME: Try move this to sriov_disable(). It's here since we allocate + * the iov state at probe time since we need to fiddle with the IOV + * resources. + */ + if (pdev->is_physfn) + kfree(pdev->dev.archdata.iov_data); +#endif + + /* + * PCI hotplug can happen as part of EEH error recovery. The @pdn + * isn't removed and added afterwards in this scenario. We should + * set the PE number in @pdn to an invalid one. Otherwise, the PE's + * device count is decreased on removing devices while failing to + * be increased on adding devices. It leads to unbalanced PE's device + * count and eventually make normal PCI hotplug path broken. + */ + pe = &phb->ioda.pe_array[pdn->pe_number]; + pdn->pe_number = IODA_INVALID_PE; + + WARN_ON(--pe->device_count < 0); + if (pe->device_count == 0) + pnv_ioda_release_pe(pe); +} + +static void pnv_pci_ioda_shutdown(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + + opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE, + OPAL_ASSERT_RESET); +} + +static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(bus); + struct pnv_ioda_pe *pe; + + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))) + continue; + + if (!pe->pbus) + continue; + + if (bus->number == ((pe->rid >> 8) & 0xFF)) { + pe->pbus = bus; + break; + } + } +} + +#ifdef CONFIG_IOMMU_API +static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose, + struct pci_dev *pdev) +{ + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + + if (WARN_ON(!phb)) + return ERR_PTR(-ENODEV); + + pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev)); + if (!pe) + return ERR_PTR(-ENODEV); + + if (!pe->table_group.group) + return ERR_PTR(-ENODEV); + + return iommu_group_ref_get(pe->table_group.group); +} +#endif + +static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { + .dma_dev_setup = pnv_pci_ioda_dma_dev_setup, + .dma_bus_setup = pnv_pci_ioda_dma_bus_setup, + .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported, + .enable_device_hook = pnv_pci_enable_device_hook, + .release_device = pnv_pci_release_device, + .window_alignment = pnv_pci_window_alignment, + .setup_bridge = pnv_pci_fixup_bridge_resources, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .shutdown = pnv_pci_ioda_shutdown, +#ifdef CONFIG_IOMMU_API + .device_group = pnv_pci_device_group, +#endif +}; + +static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { + .enable_device_hook = pnv_ocapi_enable_device_hook, + .release_device = pnv_pci_release_device, + .window_alignment = pnv_pci_window_alignment, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .shutdown = pnv_pci_ioda_shutdown, +}; + +static void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + unsigned long size, m64map_off, m32map_off, pemap_off; + struct pnv_ioda_pe *root_pe; + struct resource r; + const __be64 *prop64; + const __be32 *prop32; + int len; + unsigned int segno; + u64 phb_id; + void *aux; + long rc; + + if (!of_device_is_available(np)) + return; + + pr_info("Initializing %s PHB (%pOF)\n", pnv_phb_names[ioda_type], np); + + prop64 = of_get_property(np, "ibm,opal-phbid", NULL); + if (!prop64) { + pr_err(" Missing \"ibm,opal-phbid\" property !\n"); + return; + } + phb_id = be64_to_cpup(prop64); + pr_debug(" PHB-ID : 0x%016llx\n", phb_id); + + phb = kzalloc(sizeof(*phb), GFP_KERNEL); + if (!phb) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(*phb)); + + /* Allocate PCI controller */ + phb->hose = hose = pcibios_alloc_controller(np); + if (!phb->hose) { + pr_err(" Can't allocate PCI controller for %pOF\n", + np); + memblock_free(phb, sizeof(struct pnv_phb)); + return; + } + + spin_lock_init(&phb->lock); + prop32 = of_get_property(np, "bus-range", &len); + if (prop32 && len == 8) { + hose->first_busno = be32_to_cpu(prop32[0]); + hose->last_busno = be32_to_cpu(prop32[1]); + } else { + pr_warn(" Broken on %pOF\n", np); + hose->first_busno = 0; + hose->last_busno = 0xff; + } + hose->private_data = phb; + phb->hub_id = hub_id; + phb->opal_id = phb_id; + phb->type = ioda_type; + mutex_init(&phb->ioda.pe_alloc_mutex); + + /* Detect specific models for error handling */ + if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) + phb->model = PNV_PHB_MODEL_P7IOC; + else if (of_device_is_compatible(np, "ibm,power8-pciex")) + phb->model = PNV_PHB_MODEL_PHB3; + else + phb->model = PNV_PHB_MODEL_UNKNOWN; + + /* Initialize diagnostic data buffer */ + prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL); + if (prop32) + phb->diag_data_size = be32_to_cpup(prop32); + else + phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; + + phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL); + if (!phb->diag_data) + panic("%s: Failed to allocate %u bytes\n", __func__, + phb->diag_data_size); + + /* Parse 32-bit and IO ranges (if any) */ + pci_process_bridge_OF_ranges(hose, np, !hose->global_number); + + /* Get registers */ + if (!of_address_to_resource(np, 0, &r)) { + phb->regs_phys = r.start; + phb->regs = ioremap(r.start, resource_size(&r)); + if (phb->regs == NULL) + pr_err(" Failed to map registers !\n"); + } + + /* Initialize more IODA stuff */ + phb->ioda.total_pe_num = 1; + prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); + if (prop32) + phb->ioda.total_pe_num = be32_to_cpup(prop32); + prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); + if (prop32) + phb->ioda.reserved_pe_idx = be32_to_cpup(prop32); + + /* Invalidate RID to PE# mapping */ + for (segno = 0; segno < ARRAY_SIZE(phb->ioda.pe_rmap); segno++) + phb->ioda.pe_rmap[segno] = IODA_INVALID_PE; + + /* Parse 64-bit MMIO range */ + pnv_ioda_parse_m64_window(phb); + + phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); + /* FW Has already off top 64k of M32 space (MSI space) */ + phb->ioda.m32_size += 0x10000; + + phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num; + phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0]; + phb->ioda.io_size = hose->pci_io_size; + phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num; + phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ + + /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ + size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8, + sizeof(unsigned long)); + m64map_off = size; + size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]); + m32map_off = size; + size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]); + pemap_off = size; + size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); + aux = kzalloc(size, GFP_KERNEL); + if (!aux) + panic("%s: Failed to allocate %lu bytes\n", __func__, size); + + phb->ioda.pe_alloc = aux; + phb->ioda.m64_segmap = aux + m64map_off; + phb->ioda.m32_segmap = aux + m32map_off; + for (segno = 0; segno < phb->ioda.total_pe_num; segno++) { + phb->ioda.m64_segmap[segno] = IODA_INVALID_PE; + phb->ioda.m32_segmap[segno] = IODA_INVALID_PE; + } + phb->ioda.pe_array = aux + pemap_off; + + /* + * Choose PE number for root bus, which shouldn't have + * M64 resources consumed by its child devices. To pick + * the PE number adjacent to the reserved one if possible. + */ + pnv_ioda_reserve_pe(phb, phb->ioda.reserved_pe_idx); + if (phb->ioda.reserved_pe_idx == 0) { + phb->ioda.root_pe_idx = 1; + pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); + } else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) { + phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1; + pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); + } else { + /* otherwise just allocate one */ + root_pe = pnv_ioda_alloc_pe(phb, 1); + phb->ioda.root_pe_idx = root_pe->pe_number; + } + + INIT_LIST_HEAD(&phb->ioda.pe_list); + mutex_init(&phb->ioda.pe_list_mutex); + +#if 0 /* We should really do that ... */ + rc = opal_pci_set_phb_mem_window(opal->phb_id, + window_type, + window_num, + starting_real_address, + starting_pci_address, + segment_size); +#endif + + pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n", + phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx, + phb->ioda.m32_size, phb->ioda.m32_segsize); + if (phb->ioda.m64_size) + pr_info(" M64: 0x%lx [segment=0x%lx]\n", + phb->ioda.m64_size, phb->ioda.m64_segsize); + if (phb->ioda.io_size) + pr_info(" IO: 0x%x [segment=0x%x]\n", + phb->ioda.io_size, phb->ioda.io_segsize); + + + phb->hose->ops = &pnv_pci_ops; + phb->get_pe_state = pnv_ioda_get_pe_state; + phb->freeze_pe = pnv_ioda_freeze_pe; + phb->unfreeze_pe = pnv_ioda_unfreeze_pe; + + /* Setup MSI support */ + pnv_pci_init_ioda_msis(phb); + + /* + * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here + * to let the PCI core do resource assignment. It's supposed + * that the PCI core will do correct I/O and MMIO alignment + * for the P2P bridge bars so that each PCI bus (excluding + * the child P2P bridges) can form individual PE. + */ + ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; + + switch (phb->type) { + case PNV_PHB_NPU_OCAPI: + hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; + break; + default: + hose->controller_ops = pnv_pci_ioda_controller_ops; + } + + ppc_md.pcibios_default_alignment = pnv_pci_default_alignment; + +#ifdef CONFIG_PCI_IOV + ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov; + ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; + ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; + ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; +#endif + + pci_add_flags(PCI_REASSIGN_ALL_RSRC); + + /* Reset IODA tables to a clean state */ + rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET); + if (rc) + pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc); + + /* + * If we're running in kdump kernel, the previous kernel never + * shutdown PCI devices correctly. We already got IODA table + * cleaned out. So we have to issue PHB reset to stop all PCI + * transactions from previous kernel. The ppc_pci_reset_phbs + * kernel parameter will force this reset too. Additionally, + * if the IODA reset above failed then use a bigger hammer. + * This can happen if we get a PHB fatal error in very early + * boot. + */ + if (is_kdump_kernel() || pci_reset_phbs || rc) { + pr_info(" Issue PHB reset ...\n"); + pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); + pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); + } + + /* Remove M64 resource if we can't configure it successfully */ + if (!phb->init_m64 || phb->init_m64(phb)) + hose->mem_resources[1].flags = 0; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(hose); +} + +void __init pnv_pci_init_ioda2_phb(struct device_node *np) +{ + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); +} + +void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np) +{ + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI); +} + +static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); + + if (!machine_is(powernv)) + return; + + if (phb->type == PNV_PHB_NPU_OCAPI) + dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE; +} +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup); diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c new file mode 100644 index 0000000000..59882da3e7 --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -0,0 +1,760 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include + +#include "pci.h" + +/* + * The majority of the complexity in supporting SR-IOV on PowerNV comes from + * the need to put the MMIO space for each VF into a separate PE. Internally + * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table". + * The MBT historically only applied to the 64bit MMIO window of the PHB + * so it's common to see it referred to as the "M64BT". + * + * An MBT entry stores the mapped range as an , pair. This forces + * the address range that we want to map to be power-of-two sized and aligned. + * For conventional PCI devices this isn't really an issue since PCI device BARs + * have the same requirement. + * + * For a SR-IOV BAR things are a little more awkward since size and alignment + * are not coupled. The alignment is set based on the per-VF BAR size, but + * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs + * isn't necessarily a power of two, so neither is the total size. To fix that + * we need to finesse (read: hack) the Linux BAR allocator so that it will + * allocate the SR-IOV BARs in a way that lets us map them using the MBT. + * + * The changes to size and alignment that we need to do depend on the "mode" + * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above, + * so as a baseline we can assume that we have the following BAR modes + * available: + * + * NB: $PE_COUNT is the number of PEs that the PHB supports. + * + * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized + * segments. The n'th segment is mapped to the n'th PE. + * b) An un-segmented BAR that maps the whole address range to a specific PE. + * + * + * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR + * For comparison b) requires one entry per-VF per-BAR, or: + * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment + * to equal the size of the per-VF BAR area. So: + * + * new_size = per-vf-size * number-of-PEs + * + * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size + * to "new_size", calculated above. Implementing this is a convoluted process + * which requires several hooks in the PCI core: + * + * 1. In pcibios_device_add() we call pnv_pci_ioda_fixup_iov(). + * + * At this point the device has been probed and the device's BARs are sized, + * but no resource allocations have been done. The SR-IOV BARs are sized + * based on the maximum number of VFs supported by the device and we need + * to increase that to new_size. + * + * 2. Later, when Linux actually assigns resources it tries to make the resource + * allocations for each PCI bus as compact as possible. As a part of that it + * sorts the BARs on a bus by their required alignment, which is calculated + * using pci_resource_alignment(). + * + * For IOV resources this goes: + * pci_resource_alignment() + * pci_sriov_resource_alignment() + * pcibios_sriov_resource_alignment() + * pnv_pci_iov_resource_alignment() + * + * Our hook overrides the default alignment, equal to the per-vf-size, with + * new_size computed above. + * + * 3. When userspace enables VFs for a device: + * + * sriov_enable() + * pcibios_sriov_enable() + * pnv_pcibios_sriov_enable() + * + * This is where we actually allocate PE numbers for each VF and setup the + * MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena" + * where each MBT segment is equal in size to the VF BAR so we can shift + * around the actual SR-IOV BAR location within this arena. We need this + * ability because the PE space is shared by all devices on the same PHB. + * When using mode a) described above segment 0 in maps to PE#0 which might + * be already being used by another device on the PHB. + * + * As a result we need allocate a contigious range of PE numbers, then shift + * the address programmed into the SR-IOV BAR of the PF so that the address + * of VF0 matches up with the segment corresponding to the first allocated + * PE number. This is handled in pnv_pci_vf_resource_shift(). + * + * Once all that is done we return to the PCI core which then enables VFs, + * scans them and creates pci_devs for each. The init process for a VF is + * largely the same as a normal device, but the VF is inserted into the IODA + * PE that we allocated for it rather than the PE associated with the bus. + * + * 4. When userspace disables VFs we unwind the above in + * pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since + * we don't need to validate anything, just tear down the mappings and + * move SR-IOV resource back to its "proper" location. + * + * That's how mode a) works. In theory mode b) (single PE mapping) is less work + * since we can map each individual VF with a separate BAR. However, there's a + * few limitations: + * + * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes + * it only usable for devices with very large per-VF BARs. Such devices are + * similar to Big Foot. They definitely exist, but I've never seen one. + * + * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only + * 16 total and some are needed for. Most SR-IOV capable network cards can support + * more than 16 VFs on each port. + * + * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO + * window of the PHB. + * + * + * + * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It + * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows + * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since + * the Linux BAR allocation will place any BAR marked as non-prefetchable into + * the non-prefetchable bridge window, which is 32bit only. It also added two + * new modes: + * + * c) A segmented BAR similar to a), but each segment can be individually + * mapped to any PE. This is matches how the 32bit MMIO window worked on + * IODA1&2. + * + * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a), + * but with fewer segments and configurable base PE. + * + * i.e. The n'th segment maps to the (n + base)'th PE. + * + * The base PE is also required to be a multiple of the window size. + * + * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us + * to exploit any of the IODA3 features. + */ + +static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct resource *res; + int i; + resource_size_t vf_bar_sz; + struct pnv_iov_data *iov; + int mul; + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) + goto disable_iov; + pdev->dev.archdata.iov_data = iov; + mul = phb->ioda.total_pe_num; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || res->parent) + continue; + if (!pnv_pci_is_m64_flags(res->flags)) { + dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n", + i, res); + goto disable_iov; + } + + vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + + /* + * Generally, one segmented M64 BAR maps one IOV BAR. However, + * if a VF BAR is too large we end up wasting a lot of space. + * If each VF needs more than 1/4 of the default m64 segment + * then each VF BAR should be mapped in single-PE mode to reduce + * the amount of space required. This does however limit the + * number of VFs we can support. + * + * The 1/4 limit is arbitrary and can be tweaked. + */ + if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) { + /* + * On PHB3, the minimum size alignment of M64 BAR in + * single mode is 32MB. If this VF BAR is smaller than + * 32MB, but still too large for a segmented window + * then we can't map it and need to disable SR-IOV for + * this device. + */ + if (vf_bar_sz < SZ_32M) { + pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n", + i, res); + goto disable_iov; + } + + iov->m64_single_mode[i] = true; + continue; + } + + /* + * This BAR can be mapped with one segmented window, so adjust + * te resource size to accommodate. + */ + pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res); + res->end = res->start + vf_bar_sz * mul - 1; + pci_dbg(pdev, " %pR\n", res); + + pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", + i, res, mul); + + iov->need_shift = true; + } + + return; + +disable_iov: + /* Save ourselves some MMIO space by disabling the unusable BARs */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + res->flags = 0; + res->end = res->start - 1; + } + + pdev->dev.archdata.iov_data = NULL; + kfree(iov); +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} + +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, + int resno) +{ + resource_size_t align = pci_iov_resource_size(pdev, resno); + struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); + struct pnv_iov_data *iov = pnv_iov_get(pdev); + + /* + * iov can be null if we have an SR-IOV device with IOV BAR that can't + * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch). + * In that case we don't allow VFs to be enabled since one of their + * BARs would not be placed in the correct PE. + */ + if (!iov) + return align; + + /* + * If we're using single mode then we can just use the native VF BAR + * alignment. We validated that it's possible to use a single PE + * window above when we did the fixup. + */ + if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES]) + return align; + + /* + * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the + * SR-IOV. While from hardware perspective, the range mapped by M64 + * BAR should be size aligned. + * + * This function returns the total IOV BAR size if M64 BAR is in + * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. + */ + return phb->ioda.total_pe_num * align; +} + +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int window_id; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for_each_set_bit(window_id, iov->used_m64_bar_mask, MAX_M64_BARS) { + opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + + clear_bit(window_id, &phb->ioda.m64_bar_alloc); + } + + return 0; +} + + +/* + * PHB3 and beyond support segmented windows. The window's address range + * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1 + * mapping between PEs and segments. + */ +static int64_t pnv_ioda_map_m64_segmented(struct pnv_phb *phb, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* unused */ + size); + if (rc) + goto out; + + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_SPLIT); +out: + if (rc) + pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc); + + return rc; +} + +static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb, + int pe_num, + int window_id, + resource_size_t start, + resource_size_t size) +{ + int64_t rc; + + /* + * The API for setting up m64 mmio windows seems to have been designed + * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed + * split of 8 equally sized segments each of which could individually + * assigned to a PE. + * + * The problem with this is that the API doesn't have any way to + * communicate the number of segments we want on a BAR. This wasn't + * a problem for p7-ioc since you didn't have a choice, but the + * single PE windows added in PHB3 don't map cleanly to this API. + * + * As a result we've got this slightly awkward process where we + * call opal_pci_map_pe_mmio_window() to put the single in single + * PE mode, and set the PE for the window before setting the address + * bounds. We need to do it this way because the single PE windows + * for PHB3 have different alignment requirements on PHB3. + */ + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe_num, + OPAL_M64_WINDOW_TYPE, + window_id, + 0); + if (rc) + goto out; + + /* + * NB: In single PE mode the window needs to be aligned to 32MB + */ + rc = opal_pci_set_phb_mem_window(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + start, + 0, /* ignored by FW, m64 is 1-1 */ + size); + if (rc) + goto out; + + /* + * Now actually enable it. We specified the BAR should be in "non-split" + * mode so FW will validate that the BAR is in single PE mode. + */ + rc = opal_pci_phb_mmio_enable(phb->opal_id, + OPAL_M64_WINDOW_TYPE, + window_id, + OPAL_ENABLE_M64_NON_SPLIT); +out: + if (rc) + pr_err("Error mapping single PE BAR\n"); + + return rc; +} + +static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov) +{ + int win; + + do { + win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, + phb->ioda.m64_bar_idx + 1, 0); + + if (win >= phb->ioda.m64_bar_idx + 1) + return -1; + } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); + + set_bit(win, iov->used_m64_bar_mask); + + return win; +} + +static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int win; + struct resource *res; + int i, j; + int64_t rc; + resource_size_t size, start; + int base_pe_num; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + + /* don't need single mode? map everything in one go! */ + if (!iov->m64_single_mode[i]) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + size = resource_size(res); + start = res->start; + + rc = pnv_ioda_map_m64_segmented(phb, win, start, size); + if (rc) + goto m64_failed; + + continue; + } + + /* otherwise map each VF with single PE BARs */ + size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); + base_pe_num = iov->vf_pe_arr[0].pe_number; + + for (j = 0; j < num_vfs; j++) { + win = pnv_pci_alloc_m64_bar(phb, iov); + if (win < 0) + goto m64_failed; + + start = res->start + size * j; + rc = pnv_ioda_map_m64_single(phb, win, + base_pe_num + j, + start, + size); + if (rc) + goto m64_failed; + } + } + return 0; + +m64_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + return -EBUSY; +} + +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe, *pe_n; + + phb = pci_bus_to_pnvhb(pdev->bus); + + if (!pdev->is_physfn) + return; + + /* FIXME: Use pnv_ioda_release_pe()? */ + list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { + if (pe->parent_dev != pdev) + continue; + + pnv_pci_ioda2_release_pe_dma(pe); + + /* Remove from list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + pnv_ioda_deconfigure_pe(phb, pe); + + pnv_ioda_free_pe(pe); + } +} + +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) +{ + struct resource *res, res2; + struct pnv_iov_data *iov; + resource_size_t size; + u16 num_vfs; + int i; + + if (!dev->is_physfn) + return -EINVAL; + iov = pnv_iov_get(dev); + + /* + * "offset" is in VFs. The M64 windows are sized so that when they + * are segmented, each segment is the same size as the IOV BAR. + * Each segment is in a separate PE, and the high order bits of the + * address are the PE number. Therefore, each VF's BAR is in a + * separate PE, and changing the IOV BAR start address changes the + * range of PEs the VFs are in. + */ + num_vfs = iov->num_vfs; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + /* + * The actual IOV BAR range is determined by the start address + * and the actual size for num_vfs VFs BAR. This check is to + * make sure that after shifting, the range will not overlap + * with another device. + */ + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2.flags = res->flags; + res2.start = res->start + (size * offset); + res2.end = res2.start + (size * num_vfs) - 1; + + if (res2.end > res->end) { + dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", + i, &res2, res, num_vfs, offset); + return -EBUSY; + } + } + + /* + * Since M64 BAR shares segments among all possible 256 PEs, + * we have to shift the beginning of PF IOV BAR to make it start from + * the segment which belongs to the PE number assigned to the first VF. + * This creates a "hole" in the /proc/iomem which could be used for + * allocating other resources so we reserve this area below and + * release when IOV is released. + */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &dev->resource[i + PCI_IOV_RESOURCES]; + if (!res->flags || !res->parent) + continue; + if (iov->m64_single_mode[i]) + continue; + + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); + res2 = *res; + res->start += size * offset; + + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", + i, &res2, res, (offset > 0) ? "En" : "Dis", + num_vfs, offset); + + if (offset < 0) { + devm_release_resource(&dev->dev, &iov->holes[i]); + memset(&iov->holes[i], 0, sizeof(iov->holes[i])); + } + + pci_update_resource(dev, i + PCI_IOV_RESOURCES); + + if (offset > 0) { + iov->holes[i].start = res2.start; + iov->holes[i].end = res2.start + size * offset - 1; + iov->holes[i].flags = IORESOURCE_BUS; + iov->holes[i].name = "pnv_iov_reserved"; + devm_request_resource(&dev->dev, res->parent, + &iov->holes[i]); + } + } + return 0; +} + +static void pnv_pci_sriov_disable(struct pci_dev *pdev) +{ + u16 num_vfs, base_pe; + struct pnv_iov_data *iov; + + iov = pnv_iov_get(pdev); + if (WARN_ON(!iov)) + return; + + num_vfs = iov->num_vfs; + base_pe = iov->vf_pe_arr[0].pe_number; + + /* Release VF PEs */ + pnv_ioda_release_vf_PE(pdev); + + /* Un-shift the IOV BARs if we need to */ + if (iov->need_shift) + pnv_pci_vf_resource_shift(pdev, -base_pe); + + /* Release M64 windows */ + pnv_pci_vf_release_m64(pdev, num_vfs); +} + +static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + int pe_num; + u16 vf_index; + struct pnv_iov_data *iov; + struct pci_dn *pdn; + + if (!pdev->is_physfn) + return; + + phb = pci_bus_to_pnvhb(pdev->bus); + pdn = pci_get_pdn(pdev); + iov = pnv_iov_get(pdev); + + /* Reserve PE for each VF */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + + pe = &iov->vf_pe_arr[vf_index]; + pe->phb = phb; + pe->flags = PNV_IODA_PE_VF; + pe->pbus = NULL; + pe->parent_dev = pdev; + pe->mve_number = -1; + pe->rid = (vf_bus << 8) | vf_devfn; + + pe_num = pe->pe_number; + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", + pci_domain_nr(pdev->bus), pdev->bus->number, + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); + + if (pnv_ioda_configure_pe(phb, pe)) { + /* XXX What do we do here ? */ + pnv_ioda_free_pe(pe); + pe->pdev = NULL; + continue; + } + + /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); + list_add_tail(&pe->list, &phb->ioda.pe_list); + mutex_unlock(&phb->ioda.pe_list_mutex); + + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + + pnv_pci_ioda2_setup_dma_pe(phb, pe); + } +} + +static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pnv_ioda_pe *base_pe; + struct pnv_iov_data *iov; + struct pnv_phb *phb; + int ret; + u16 i; + + phb = pci_bus_to_pnvhb(pdev->bus); + iov = pnv_iov_get(pdev); + + /* + * There's a calls to IODA2 PE setup code littered throughout. We could + * probably fix that, but we'd still have problems due to the + * restriction inherent on IODA1 PHBs. + * + * NB: We class IODA3 as IODA2 since they're very similar. + */ + if (phb->type != PNV_PHB_IODA2) { + pci_err(pdev, "SR-IOV is not supported on this PHB\n"); + return -ENXIO; + } + + if (!iov) { + dev_info(&pdev->dev, "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n"); + return -ENOSPC; + } + + /* allocate a contiguous block of PEs for our VFs */ + base_pe = pnv_ioda_alloc_pe(phb, num_vfs); + if (!base_pe) { + pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs); + return -EBUSY; + } + + iov->vf_pe_arr = base_pe; + iov->num_vfs = num_vfs; + + /* Assign M64 window accordingly */ + ret = pnv_pci_vf_assign_m64(pdev, num_vfs); + if (ret) { + dev_info(&pdev->dev, "Not enough M64 window resources\n"); + goto m64_failed; + } + + /* + * When using one M64 BAR to map one IOV BAR, we need to shift + * the IOV BAR according to the PE# allocated to the VFs. + * Otherwise, the PE# for the VF will conflict with others. + */ + if (iov->need_shift) { + ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number); + if (ret) + goto shift_failed; + } + + /* Setup VF PEs */ + pnv_ioda_setup_vf_PE(pdev, num_vfs); + + return 0; + +shift_failed: + pnv_pci_vf_release_m64(pdev, num_vfs); + +m64_failed: + for (i = 0; i < num_vfs; i++) + pnv_ioda_free_pe(&iov->vf_pe_arr[i]); + + return ret; +} + +int pnv_pcibios_sriov_disable(struct pci_dev *pdev) +{ + pnv_pci_sriov_disable(pdev); + + /* Release PCI data */ + remove_sriov_vf_pdns(pdev); + return 0; +} + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_sriov_vf_pdns(pdev); + + return pnv_pci_sriov_enable(pdev, num_vfs); +} diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c new file mode 100644 index 0000000000..35f566aa04 --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci.c @@ -0,0 +1,862 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support PCI/PCIe on PowerNV platforms + * + * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "powernv.h" +#include "pci.h" + +static DEFINE_MUTEX(tunnel_mutex); + +int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) +{ + struct device_node *node = np; + u32 bdfn; + u64 phbid; + int ret; + + ret = of_property_read_u32(np, "reg", &bdfn); + if (ret) + return -ENXIO; + + bdfn = ((bdfn & 0x00ffff00) >> 8); + for (node = np; node; node = of_get_parent(node)) { + if (!PCI_DN(node)) { + of_node_put(node); + break; + } + + if (!of_device_is_compatible(node, "ibm,ioda2-phb") && + !of_device_is_compatible(node, "ibm,ioda3-phb") && + !of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) { + of_node_put(node); + continue; + } + + ret = of_property_read_u64(node, "ibm,opal-phbid", &phbid); + if (ret) { + of_node_put(node); + return -ENXIO; + } + + if (of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) + *id = PCI_PHB_SLOT_ID(phbid); + else + *id = PCI_SLOT_ID(phbid, bdfn); + return 0; + } + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_slot_id); + +int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len) +{ + int64_t rc; + + if (!opal_check_token(OPAL_GET_DEVICE_TREE)) + return -ENXIO; + + rc = opal_get_device_tree(phandle, (uint64_t)buf, len); + if (rc < OPAL_SUCCESS) + return -EIO; + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_device_tree); + +int pnv_pci_get_presence_state(uint64_t id, uint8_t *state) +{ + int64_t rc; + + if (!opal_check_token(OPAL_PCI_GET_PRESENCE_STATE)) + return -ENXIO; + + rc = opal_pci_get_presence_state(id, (uint64_t)state); + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_presence_state); + +int pnv_pci_get_power_state(uint64_t id, uint8_t *state) +{ + int64_t rc; + + if (!opal_check_token(OPAL_PCI_GET_POWER_STATE)) + return -ENXIO; + + rc = opal_pci_get_power_state(id, (uint64_t)state); + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_power_state); + +int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg) +{ + struct opal_msg m; + int token, ret; + int64_t rc; + + if (!opal_check_token(OPAL_PCI_SET_POWER_STATE)) + return -ENXIO; + + token = opal_async_get_token_interruptible(); + if (unlikely(token < 0)) + return token; + + rc = opal_pci_set_power_state(token, id, (uint64_t)&state); + if (rc == OPAL_SUCCESS) { + ret = 0; + goto exit; + } else if (rc != OPAL_ASYNC_COMPLETION) { + ret = -EIO; + goto exit; + } + + ret = opal_async_wait_response(token, &m); + if (ret < 0) + goto exit; + + if (msg) { + ret = 1; + memcpy(msg, &m, sizeof(m)); + } + +exit: + opal_async_release_token(token); + return ret; +} +EXPORT_SYMBOL_GPL(pnv_pci_set_power_state); + +/* Nicely print the contents of the PE State Tables (PEST). */ +static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size) +{ + __be64 prevA = ULONG_MAX, prevB = ULONG_MAX; + bool dup = false; + int i; + + for (i = 0; i < pest_size; i++) { + __be64 peA = be64_to_cpu(pestA[i]); + __be64 peB = be64_to_cpu(pestB[i]); + + if (peA != prevA || peB != prevB) { + if (dup) { + pr_info("PE[..%03x] A/B: as above\n", i-1); + dup = false; + } + prevA = peA; + prevB = peB; + if (peA & PNV_IODA_STOPPED_STATE || + peB & PNV_IODA_STOPPED_STATE) + pr_info("PE[%03x] A/B: %016llx %016llx\n", + i, peA, peB); + } else if (!dup && (peA & PNV_IODA_STOPPED_STATE || + peB & PNV_IODA_STOPPED_STATE)) { + dup = true; + } + } +} + +static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, + struct OpalIoPhbErrorCommon *common) +{ + struct OpalIoP7IOCPhbErrorData *data; + + data = (struct OpalIoP7IOCPhbErrorData *)common; + pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n", + hose->global_number, be32_to_cpu(common->version)); + + if (data->brdgCtl) + pr_info("brdgCtl: %08x\n", + be32_to_cpu(data->brdgCtl)); + if (data->portStatusReg || data->rootCmplxStatus || + data->busAgentStatus) + pr_info("UtlSts: %08x %08x %08x\n", + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); + if (data->deviceStatus || data->slotStatus || + data->linkStatus || data->devCmdStatus || + data->devSecStatus) + pr_info("RootSts: %08x %08x %08x %08x %08x\n", + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); + if (data->rootErrorStatus || data->uncorrErrorStatus || + data->corrErrorStatus) + pr_info("RootErrSts: %08x %08x %08x\n", + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); + if (data->tlpHdr1 || data->tlpHdr2 || + data->tlpHdr3 || data->tlpHdr4) + pr_info("RootErrLog: %08x %08x %08x %08x\n", + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); + if (data->sourceId || data->errorClass || + data->correlator) + pr_info("RootErrLog1: %08x %016llx %016llx\n", + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); + if (data->p7iocPlssr || data->p7iocCsr) + pr_info("PhbSts: %016llx %016llx\n", + be64_to_cpu(data->p7iocPlssr), + be64_to_cpu(data->p7iocCsr)); + if (data->lemFir) + pr_info("Lem: %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); + if (data->phbErrorStatus) + pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); + if (data->mmioErrorStatus) + pr_info("OutErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); + if (data->dma0ErrorStatus) + pr_info("InAErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); + if (data->dma1ErrorStatus) + pr_info("InBErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); + + pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_P7IOC_NUM_PEST_REGS); +} + +static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, + struct OpalIoPhbErrorCommon *common) +{ + struct OpalIoPhb3ErrorData *data; + + data = (struct OpalIoPhb3ErrorData*)common; + pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n", + hose->global_number, be32_to_cpu(common->version)); + if (data->brdgCtl) + pr_info("brdgCtl: %08x\n", + be32_to_cpu(data->brdgCtl)); + if (data->portStatusReg || data->rootCmplxStatus || + data->busAgentStatus) + pr_info("UtlSts: %08x %08x %08x\n", + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); + if (data->deviceStatus || data->slotStatus || + data->linkStatus || data->devCmdStatus || + data->devSecStatus) + pr_info("RootSts: %08x %08x %08x %08x %08x\n", + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); + if (data->rootErrorStatus || data->uncorrErrorStatus || + data->corrErrorStatus) + pr_info("RootErrSts: %08x %08x %08x\n", + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); + if (data->tlpHdr1 || data->tlpHdr2 || + data->tlpHdr3 || data->tlpHdr4) + pr_info("RootErrLog: %08x %08x %08x %08x\n", + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); + if (data->sourceId || data->errorClass || + data->correlator) + pr_info("RootErrLog1: %08x %016llx %016llx\n", + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); + if (data->nFir) + pr_info("nFir: %016llx %016llx %016llx\n", + be64_to_cpu(data->nFir), + be64_to_cpu(data->nFirMask), + be64_to_cpu(data->nFirWOF)); + if (data->phbPlssr || data->phbCsr) + pr_info("PhbSts: %016llx %016llx\n", + be64_to_cpu(data->phbPlssr), + be64_to_cpu(data->phbCsr)); + if (data->lemFir) + pr_info("Lem: %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); + if (data->phbErrorStatus) + pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); + if (data->mmioErrorStatus) + pr_info("OutErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); + if (data->dma0ErrorStatus) + pr_info("InAErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); + if (data->dma1ErrorStatus) + pr_info("InBErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); + + pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB3_NUM_PEST_REGS); +} + +static void pnv_pci_dump_phb4_diag_data(struct pci_controller *hose, + struct OpalIoPhbErrorCommon *common) +{ + struct OpalIoPhb4ErrorData *data; + + data = (struct OpalIoPhb4ErrorData*)common; + pr_info("PHB4 PHB#%d Diag-data (Version: %d)\n", + hose->global_number, be32_to_cpu(common->version)); + if (data->brdgCtl) + pr_info("brdgCtl: %08x\n", + be32_to_cpu(data->brdgCtl)); + if (data->deviceStatus || data->slotStatus || + data->linkStatus || data->devCmdStatus || + data->devSecStatus) + pr_info("RootSts: %08x %08x %08x %08x %08x\n", + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); + if (data->rootErrorStatus || data->uncorrErrorStatus || + data->corrErrorStatus) + pr_info("RootErrSts: %08x %08x %08x\n", + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); + if (data->tlpHdr1 || data->tlpHdr2 || + data->tlpHdr3 || data->tlpHdr4) + pr_info("RootErrLog: %08x %08x %08x %08x\n", + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); + if (data->sourceId) + pr_info("sourceId: %08x\n", be32_to_cpu(data->sourceId)); + if (data->nFir) + pr_info("nFir: %016llx %016llx %016llx\n", + be64_to_cpu(data->nFir), + be64_to_cpu(data->nFirMask), + be64_to_cpu(data->nFirWOF)); + if (data->phbPlssr || data->phbCsr) + pr_info("PhbSts: %016llx %016llx\n", + be64_to_cpu(data->phbPlssr), + be64_to_cpu(data->phbCsr)); + if (data->lemFir) + pr_info("Lem: %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); + if (data->phbErrorStatus) + pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); + if (data->phbTxeErrorStatus) + pr_info("PhbTxeErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbTxeErrorStatus), + be64_to_cpu(data->phbTxeFirstErrorStatus), + be64_to_cpu(data->phbTxeErrorLog0), + be64_to_cpu(data->phbTxeErrorLog1)); + if (data->phbRxeArbErrorStatus) + pr_info("RxeArbErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbRxeArbErrorStatus), + be64_to_cpu(data->phbRxeArbFirstErrorStatus), + be64_to_cpu(data->phbRxeArbErrorLog0), + be64_to_cpu(data->phbRxeArbErrorLog1)); + if (data->phbRxeMrgErrorStatus) + pr_info("RxeMrgErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbRxeMrgErrorStatus), + be64_to_cpu(data->phbRxeMrgFirstErrorStatus), + be64_to_cpu(data->phbRxeMrgErrorLog0), + be64_to_cpu(data->phbRxeMrgErrorLog1)); + if (data->phbRxeTceErrorStatus) + pr_info("RxeTceErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbRxeTceErrorStatus), + be64_to_cpu(data->phbRxeTceFirstErrorStatus), + be64_to_cpu(data->phbRxeTceErrorLog0), + be64_to_cpu(data->phbRxeTceErrorLog1)); + + if (data->phbPblErrorStatus) + pr_info("PblErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbPblErrorStatus), + be64_to_cpu(data->phbPblFirstErrorStatus), + be64_to_cpu(data->phbPblErrorLog0), + be64_to_cpu(data->phbPblErrorLog1)); + if (data->phbPcieDlpErrorStatus) + pr_info("PcieDlp: %016llx %016llx %016llx\n", + be64_to_cpu(data->phbPcieDlpErrorLog1), + be64_to_cpu(data->phbPcieDlpErrorLog2), + be64_to_cpu(data->phbPcieDlpErrorStatus)); + if (data->phbRegbErrorStatus) + pr_info("RegbErr: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->phbRegbErrorStatus), + be64_to_cpu(data->phbRegbFirstErrorStatus), + be64_to_cpu(data->phbRegbErrorLog0), + be64_to_cpu(data->phbRegbErrorLog1)); + + + pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB4_NUM_PEST_REGS); +} + +void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, + unsigned char *log_buff) +{ + struct OpalIoPhbErrorCommon *common; + + if (!hose || !log_buff) + return; + + common = (struct OpalIoPhbErrorCommon *)log_buff; + switch (be32_to_cpu(common->ioType)) { + case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: + pnv_pci_dump_p7ioc_diag_data(hose, common); + break; + case OPAL_PHB_ERROR_DATA_TYPE_PHB3: + pnv_pci_dump_phb3_diag_data(hose, common); + break; + case OPAL_PHB_ERROR_DATA_TYPE_PHB4: + pnv_pci_dump_phb4_diag_data(hose, common); + break; + default: + pr_warn("%s: Unrecognized ioType %d\n", + __func__, be32_to_cpu(common->ioType)); + } +} + +static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) +{ + unsigned long flags, rc; + int has_diag, ret = 0; + + spin_lock_irqsave(&phb->lock, flags); + + /* Fetch PHB diag-data */ + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data, + phb->diag_data_size); + has_diag = (rc == OPAL_SUCCESS); + + /* If PHB supports compound PE, to handle it */ + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, + pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (rc) { + pr_warn("%s: Failure %ld clearing frozen " + "PHB#%x-PE#%x\n", + __func__, rc, phb->hose->global_number, + pe_no); + ret = -EIO; + } + } + + /* + * For now, let's only display the diag buffer when we fail to clear + * the EEH status. We'll do more sensible things later when we have + * proper EEH support. We need to make sure we don't pollute ourselves + * with the normal errors generated when probing empty slots + */ + if (has_diag && ret) + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data); + + spin_unlock_irqrestore(&phb->lock, flags); +} + +static void pnv_pci_config_check_eeh(struct pci_dn *pdn) +{ + struct pnv_phb *phb = pdn->phb->private_data; + u8 fstate = 0; + __be16 pcierr = 0; + unsigned int pe_no; + s64 rc; + + /* + * Get the PE#. During the PCI probe stage, we might not + * setup that yet. So all ER errors should be mapped to + * reserved PE. + */ + pe_no = pdn->pe_number; + if (pe_no == IODA_INVALID_PE) { + pe_no = phb->ioda.reserved_pe_idx; + } + + /* + * Fetch frozen state. If the PHB support compound PE, + * we need handle that case. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe_no); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe_no, + &fstate, + &pcierr, + NULL); + if (rc) { + pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n", + __func__, rc, phb->hose->global_number, pe_no); + return; + } + } + + pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n", + (pdn->busno << 8) | (pdn->devfn), pe_no, fstate); + + /* Clear the frozen state if applicable */ + if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE || + fstate == OPAL_EEH_STOPPED_DMA_FREEZE || + fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) { + /* + * If PHB supports compound PE, freeze it for + * consistency. + */ + if (phb->freeze_pe) + phb->freeze_pe(phb, pe_no); + + pnv_pci_handle_eeh_config(phb, pe_no); + } +} + +int pnv_pci_cfg_read(struct pci_dn *pdn, + int where, int size, u32 *val) +{ + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; + s64 rc; + + switch (size) { + case 1: { + u8 v8; + rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8); + *val = (rc == OPAL_SUCCESS) ? v8 : 0xff; + break; + } + case 2: { + __be16 v16; + rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where, + &v16); + *val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff; + break; + } + case 4: { + __be32 v32; + rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32); + *val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff; + break; + } + default: + return PCIBIOS_FUNC_NOT_SUPPORTED; + } + + pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + __func__, pdn->busno, pdn->devfn, where, size, *val); + return PCIBIOS_SUCCESSFUL; +} + +int pnv_pci_cfg_write(struct pci_dn *pdn, + int where, int size, u32 val) +{ + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; + + pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + __func__, pdn->busno, pdn->devfn, where, size, val); + switch (size) { + case 1: + opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); + break; + case 2: + opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val); + break; + case 4: + opal_pci_config_write_word(phb->opal_id, bdfn, where, val); + break; + default: + return PCIBIOS_FUNC_NOT_SUPPORTED; + } + + return PCIBIOS_SUCCESSFUL; +} + +#ifdef CONFIG_EEH +static bool pnv_pci_cfg_check(struct pci_dn *pdn) +{ + struct eeh_dev *edev = NULL; + struct pnv_phb *phb = pdn->phb->private_data; + + /* EEH not enabled ? */ + if (!(phb->flags & PNV_PHB_FLAG_EEH)) + return true; + + /* PE reset or device removed ? */ + edev = pdn->edev; + if (edev) { + if (edev->pe && + (edev->pe->state & EEH_PE_CFG_BLOCKED)) + return false; + + if (edev->mode & EEH_DEV_REMOVED) + return false; + } + + return true; +} +#else +static inline pnv_pci_cfg_check(struct pci_dn *pdn) +{ + return true; +} +#endif /* CONFIG_EEH */ + +static int pnv_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct pci_dn *pdn; + struct pnv_phb *phb; + int ret; + + *val = 0xFFFFFFFF; + pdn = pci_get_pdn_by_devfn(bus, devfn); + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (!pnv_pci_cfg_check(pdn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + ret = pnv_pci_cfg_read(pdn, where, size, val); + phb = pdn->phb->private_data; + if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) { + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(pdn->edev)) + return PCIBIOS_DEVICE_NOT_FOUND; + } else { + pnv_pci_config_check_eeh(pdn); + } + + return ret; +} + +static int pnv_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct pci_dn *pdn; + struct pnv_phb *phb; + int ret; + + pdn = pci_get_pdn_by_devfn(bus, devfn); + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (!pnv_pci_cfg_check(pdn)) + return PCIBIOS_DEVICE_NOT_FOUND; + + ret = pnv_pci_cfg_write(pdn, where, size, val); + phb = pdn->phb->private_data; + if (!(phb->flags & PNV_PHB_FLAG_EEH)) + pnv_pci_config_check_eeh(pdn); + + return ret; +} + +struct pci_ops pnv_pci_ops = { + .read = pnv_pci_read_config, + .write = pnv_pci_write_config, +}; + +struct iommu_table *pnv_pci_table_alloc(int nid) +{ + struct iommu_table *tbl; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid); + if (!tbl) + return NULL; + + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + kref_init(&tbl->it_kref); + + return tbl; +} + +struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + return of_node_get(hose->dn); +} +EXPORT_SYMBOL(pnv_pci_get_phb_node); + +int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) +{ + struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus); + u64 tunnel_bar; + __be64 val; + int rc; + + if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) + return -ENXIO; + if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) + return -ENXIO; + + mutex_lock(&tunnel_mutex); + rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto out; + } + tunnel_bar = be64_to_cpu(val); + if (enable) { + /* + * Only one device per PHB can use atomics. + * Our policy is first-come, first-served. + */ + if (tunnel_bar) { + if (tunnel_bar != addr) + rc = -EBUSY; + else + rc = 0; /* Setting same address twice is ok */ + goto out; + } + } else { + /* + * The device that owns atomics and wants to release + * them must pass the same address with enable == 0. + */ + if (tunnel_bar != addr) { + rc = -EPERM; + goto out; + } + addr = 0x0ULL; + } + rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr); + rc = opal_error_code(rc); +out: + mutex_unlock(&tunnel_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); + +void pnv_pci_shutdown(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) + if (hose->controller_ops.shutdown) + hose->controller_ops.shutdown(hose); +} + +/* Fixup wrong class code in p7ioc and p8 root complex */ +static void pnv_p7ioc_rc_quirk(struct pci_dev *dev) +{ + dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk); + +void __init pnv_pci_init(void) +{ + struct device_node *np; + + pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN); + + /* If we don't have OPAL, eg. in sim, just skip PCI probe */ + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return; + +#ifdef CONFIG_PCIEPORTBUS + /* + * On PowerNV PCIe devices are (currently) managed in cooperation + * with firmware. This isn't *strictly* required, but there's enough + * assumptions baked into both firmware and the platform code that + * it's unwise to allow the portbus services to be used. + * + * We need to fix this eventually, but for now set this flag to disable + * the portbus driver. The AER service isn't required since that AER + * events are handled via EEH. The pciehp hotplug driver can't work + * without kernel changes (and portbus binding breaks pnv_php). The + * other services also require some thinking about how we're going + * to integrate them. + */ + pcie_ports_disabled = true; +#endif + + /* Look for ioda2 built-in PHB3's */ + for_each_compatible_node(np, NULL, "ibm,ioda2-phb") + pnv_pci_init_ioda2_phb(np); + + /* Look for ioda3 built-in PHB4's, we treat them as IODA2 */ + for_each_compatible_node(np, NULL, "ibm,ioda3-phb") + pnv_pci_init_ioda2_phb(np); + + /* Look for NPU2 OpenCAPI PHBs */ + for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_pci_init_npu2_opencapi_phb(np); + + /* Configure IOMMU DMA hooks */ + set_pci_dma_ops(&dma_iommu_ops); +} diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h new file mode 100644 index 0000000000..957f2b47a3 --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci.h @@ -0,0 +1,340 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __POWERNV_PCI_H +#define __POWERNV_PCI_H + +#include /* for __printf */ +#include +#include +#include + +struct pci_dn; + +enum pnv_phb_type { + PNV_PHB_IODA2, + PNV_PHB_NPU_OCAPI, +}; + +/* Precise PHB model for error management */ +enum pnv_phb_model { + PNV_PHB_MODEL_UNKNOWN, + PNV_PHB_MODEL_P7IOC, + PNV_PHB_MODEL_PHB3, +}; + +#define PNV_PCI_DIAG_BUF_SIZE 8192 +#define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */ +#define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */ +#define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ +#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ +#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ +#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ + +/* + * A brief note on PNV_IODA_PE_BUS_ALL + * + * This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses + * the Requester ID field of the PCIe request header to determine the device + * (and PE) that initiated a DMA. In legacy PCI individual memory read/write + * requests aren't tagged with the RID. To work around this the PCIe-to-PCI + * bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side. + * + * PCIe-to-X bridges have a similar issue even though PCI-X requests also have + * a RID in the transaction header. The PCIe-to-X bridge is permitted to "take + * ownership" of a transaction by a PCI-X device when forwarding it to the PCIe + * side of the bridge. + * + * To work around these problems we use the BUS_ALL flag since every subordinate + * bus of the bridge should go into the same PE. + */ + +/* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */ +#define PNV_IODA_STOPPED_STATE 0x8000000000000000 + +/* Data associated with a PE, including IOMMU tracking etc.. */ +struct pnv_phb; +struct pnv_ioda_pe { + unsigned long flags; + struct pnv_phb *phb; + int device_count; + + /* A PE can be associated with a single device or an + * entire bus (& children). In the former case, pdev + * is populated, in the later case, pbus is. + */ +#ifdef CONFIG_PCI_IOV + struct pci_dev *parent_dev; +#endif + struct pci_dev *pdev; + struct pci_bus *pbus; + + /* Effective RID (device RID for a device PE and base bus + * RID with devfn 0 for a bus PE) + */ + unsigned int rid; + + /* PE number */ + unsigned int pe_number; + + /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ + struct iommu_table_group table_group; + + /* 64-bit TCE bypass region */ + bool tce_bypass_enabled; + uint64_t tce_bypass_base; + + /* + * Used to track whether we've done DMA setup for this PE or not. We + * want to defer allocating TCE tables, etc until we've added a + * non-bridge device to the PE. + */ + bool dma_setup_done; + + /* MSIs. MVE index is identical for 32 and 64 bit MSI + * and -1 if not supported. (It's actually identical to the + * PE number) + */ + int mve_number; + + /* PEs in compound case */ + struct pnv_ioda_pe *master; + struct list_head slaves; + + /* Link in list of PE#s */ + struct list_head list; +}; + +#define PNV_PHB_FLAG_EEH (1 << 0) + +struct pnv_phb { + struct pci_controller *hose; + enum pnv_phb_type type; + enum pnv_phb_model model; + u64 hub_id; + u64 opal_id; + int flags; + void __iomem *regs; + u64 regs_phys; + spinlock_t lock; + +#ifdef CONFIG_DEBUG_FS + int has_dbgfs; + struct dentry *dbgfs; +#endif + + unsigned int msi_base; + struct msi_bitmap msi_bmp; + int (*init_m64)(struct pnv_phb *phb); + int (*get_pe_state)(struct pnv_phb *phb, int pe_no); + void (*freeze_pe)(struct pnv_phb *phb, int pe_no); + int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); + + struct { + /* Global bridge info */ + unsigned int total_pe_num; + unsigned int reserved_pe_idx; + unsigned int root_pe_idx; + + /* 32-bit MMIO window */ + unsigned int m32_size; + unsigned int m32_segsize; + unsigned int m32_pci_base; + + /* 64-bit MMIO window */ + unsigned int m64_bar_idx; + unsigned long m64_size; + unsigned long m64_segsize; + unsigned long m64_base; +#define MAX_M64_BARS 64 + unsigned long m64_bar_alloc; + + /* IO ports */ + unsigned int io_size; + unsigned int io_segsize; + unsigned int io_pci_base; + + /* PE allocation */ + struct mutex pe_alloc_mutex; + unsigned long *pe_alloc; + struct pnv_ioda_pe *pe_array; + + /* M32 & IO segment maps */ + unsigned int *m64_segmap; + unsigned int *m32_segmap; + unsigned int *io_segmap; + + /* IRQ chip */ + int irq_chip_init; + struct irq_chip irq_chip; + + /* Sorted list of used PE's based + * on the sequence of creation + */ + struct list_head pe_list; + struct mutex pe_list_mutex; + + /* Reverse map of PEs, indexed by {bus, devfn} */ + unsigned int pe_rmap[0x10000]; + } ioda; + + /* PHB and hub diagnostics */ + unsigned int diag_data_size; + u8 *diag_data; +}; + + +/* IODA PE management */ + +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) +{ + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); +} + +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; +} + +int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); + +void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe); +void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe); + +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count); +void pnv_ioda_free_pe(struct pnv_ioda_pe *pe); + +#ifdef CONFIG_PCI_IOV +/* + * For SR-IOV we want to put each VF's MMIO resource in to a separate PE. + * This requires a bit of acrobatics with the MMIO -> PE configuration + * and this structure is used to keep track of it all. + */ +struct pnv_iov_data { + /* number of VFs enabled */ + u16 num_vfs; + + /* pointer to the array of VF PEs. num_vfs long*/ + struct pnv_ioda_pe *vf_pe_arr; + + /* Did we map the VF BAR with single-PE IODA BARs? */ + bool m64_single_mode[PCI_SRIOV_NUM_BARS]; + + /* + * True if we're using any segmented windows. In that case we need + * shift the start of the IOV resource the segment corresponding to + * the allocated PE. + */ + bool need_shift; + + /* + * Bit mask used to track which m64 windows are used to map the + * SR-IOV BARs for this device. + */ + DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS); + + /* + * If we map the SR-IOV BARs with a segmented window then + * parts of that window will be "claimed" by other PEs. + * + * "holes" here is used to reserve the leading portion + * of the window that is used by other (non VF) PEs. + */ + struct resource holes[PCI_SRIOV_NUM_BARS]; +}; + +static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev) +{ + return pdev->dev.archdata.iov_data; +} + +void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev); +resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno); + +int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs); +int pnv_pcibios_sriov_disable(struct pci_dev *pdev); +#endif /* CONFIG_PCI_IOV */ + +extern struct pci_ops pnv_pci_ops; + +void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, + unsigned char *log_buff); +int pnv_pci_cfg_read(struct pci_dn *pdn, + int where, int size, u32 *val); +int pnv_pci_cfg_write(struct pci_dn *pdn, + int where, int size, u32 val); +extern struct iommu_table *pnv_pci_table_alloc(int nid); + +extern void pnv_pci_init_ioda_hub(struct device_node *np); +extern void pnv_pci_init_ioda2_phb(struct device_node *np); +extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); +extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); +extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); + +extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn); +extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); +extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); +extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, + __u64 window_size, __u32 levels); +extern int pnv_eeh_post_init(void); + +__printf(3, 4) +extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, + const char *fmt, ...); +#define pe_err(pe, fmt, ...) \ + pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__) +#define pe_warn(pe, fmt, ...) \ + pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) +#define pe_info(pe, fmt, ...) \ + pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) + +/* pci-ioda-tce.c */ +#define POWERNV_IOMMU_DEFAULT_LEVELS 2 +#define POWERNV_IOMMU_MAX_LEVELS 5 + +extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + unsigned long attrs); +extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages); +extern int pnv_tce_xchg(struct iommu_table *tbl, long index, + unsigned long *hpa, enum dma_data_direction *direction); +extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, + bool alloc); +extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index); + +extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, + __u32 page_shift, __u64 window_size, __u32 levels, + bool alloc_userspace_copy, struct iommu_table *tbl); +extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); + +extern long pnv_pci_link_table_and_group(int node, int num, + struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, + struct iommu_table_group *table_group); +extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, + void *tce_mem, u64 tce_size, + u64 dma_offset, unsigned int page_shift); + +extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb); + +static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus) +{ + struct pci_controller *hose = bus->sysdata; + + if (hose) + return hose->private_data; + + return NULL; +} + +#endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h new file mode 100644 index 0000000000..866efdc103 --- /dev/null +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _POWERNV_H +#define _POWERNV_H + +/* + * There's various hacks scattered throughout the generic powerpc arch code + * that needs to call into powernv platform stuff. The prototypes for those + * functions are in asm/powernv.h + */ +#include + +#ifdef CONFIG_SMP +extern void pnv_smp_init(void); +#else +static inline void pnv_smp_init(void) { } +#endif + +extern void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) __noreturn; + +struct pci_dev; + +#ifdef CONFIG_PCI +extern void pnv_pci_init(void); +extern void pnv_pci_shutdown(void); +#else +static inline void pnv_pci_init(void) { } +static inline void pnv_pci_shutdown(void) { } +#endif + +extern u32 pnv_get_supported_cpuidle_states(void); + +extern void pnv_lpc_init(void); + +extern void opal_handle_events(void); +extern bool opal_have_pending_events(void); +extern void opal_event_shutdown(void); + +bool cpu_core_split_required(void); + +struct memcons; +ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); +u32 __init memcons_get_size(struct memcons *mc); +struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name); + +void pnv_rng_init(void); + +#endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c new file mode 100644 index 0000000000..196aa70fe0 --- /dev/null +++ b/arch/powerpc/platforms/powernv/rng.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +#define pr_fmt(fmt) "powernv-rng: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "powernv.h" + +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul + +struct pnv_rng { + void __iomem *regs; + void __iomem *regs_real; + unsigned long mask; +}; + +static DEFINE_PER_CPU(struct pnv_rng *, pnv_rng); + +static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val) +{ + unsigned long parity; + + /* Calculate the parity of the value */ + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); + + /* xor our value with the previous mask */ + val ^= rng->mask; + + /* update the mask based on the parity of this value */ + rng->mask = (rng->mask << 1) | (parity & 1); + + return val; +} + +static int pnv_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +static int __init initialise_darn(void) +{ + unsigned long val; + int i; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + + for (i = 0; i < 10; i++) { + if (pnv_get_random_darn(&val)) { + ppc_md.get_random_seed = pnv_get_random_darn; + return 0; + } + } + return -EIO; +} + +int pnv_get_random_long(unsigned long *v) +{ + struct pnv_rng *rng; + + if (mfmsr() & MSR_DR) { + rng = get_cpu_var(pnv_rng); + *v = rng_whiten(rng, in_be64(rng->regs)); + put_cpu_var(rng); + } else { + rng = raw_cpu_read(pnv_rng); + *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); + } + return 1; +} +EXPORT_SYMBOL_GPL(pnv_get_random_long); + +static __init void rng_init_per_cpu(struct pnv_rng *rng, + struct device_node *dn) +{ + int chip_id, cpu; + + chip_id = of_get_ibm_chip_id(dn); + if (chip_id == -1) + pr_warn("No ibm,chip-id found for %pOF.\n", dn); + + for_each_possible_cpu(cpu) { + if (per_cpu(pnv_rng, cpu) == NULL || + cpu_to_chip_id(cpu) == chip_id) { + per_cpu(pnv_rng, cpu) = rng; + } + } +} + +static __init int rng_create(struct device_node *dn) +{ + struct pnv_rng *rng; + struct resource res; + unsigned long val; + + rng = kzalloc(sizeof(*rng), GFP_KERNEL); + if (!rng) + return -ENOMEM; + + if (of_address_to_resource(dn, 0, &res)) { + kfree(rng); + return -ENXIO; + } + + rng->regs_real = (void __iomem *)res.start; + + rng->regs = of_iomap(dn, 0); + if (!rng->regs) { + kfree(rng); + return -ENXIO; + } + + val = in_be64(rng->regs); + rng->mask = val; + + rng_init_per_cpu(rng, dn); + + ppc_md.get_random_seed = pnv_get_random_long; + + return 0; +} + +static int __init pnv_get_random_long_early(unsigned long *v) +{ + struct device_node *dn; + + if (!slab_is_available()) + return 0; + + if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early, + NULL) != pnv_get_random_long_early) + return 0; + + for_each_compatible_node(dn, NULL, "ibm,power-rng") + rng_create(dn); + + if (!ppc_md.get_random_seed) + return 0; + return ppc_md.get_random_seed(v); +} + +void __init pnv_rng_init(void) +{ + struct device_node *dn; + + /* Prefer darn over the rest. */ + if (!initialise_darn()) + return; + + dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng"); + if (dn) + ppc_md.get_random_seed = pnv_get_random_long_early; + + of_node_put(dn); +} + +static int __init pnv_rng_late_init(void) +{ + struct device_node *dn; + unsigned long v; + + /* In case it wasn't called during init for some other reason. */ + if (ppc_md.get_random_seed == pnv_get_random_long_early) + pnv_get_random_long_early(&v); + + if (ppc_md.get_random_seed == pnv_get_random_long) { + for_each_compatible_node(dn, NULL, "ibm,power-rng") + of_platform_device_create(dn, NULL, NULL); + } + + return 0; +} +machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c new file mode 100644 index 0000000000..4dbb47ddbd --- /dev/null +++ b/arch/powerpc/platforms/powernv/setup.c @@ -0,0 +1,587 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerNV setup code. + * + * Copyright 2011 IBM Corp. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "powernv.h" + + +static bool __init fw_feature_is(const char *state, const char *name, + struct device_node *fw_features) +{ + struct device_node *np; + bool rc = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + rc = of_property_read_bool(np, state); + of_node_put(np); + } + + return rc; +} + +static void __init init_fw_feat_flags(struct device_node *np) +{ + if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (fw_feature_is("enabled", "fw-bcctrl-serialized", np)) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (fw_feature_is("enabled", "fw-l1d-thread-split", np)) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np)) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np)) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + */ + if (fw_feature_is("disabled", "speculation-policy-favor-security", np)) + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); + + if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); + + if (fw_feature_is("enabled", "no-need-l1d-flush-msr-pr-1-to-0", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (fw_feature_is("enabled", "no-need-l1d-flush-kernel-on-user-access", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + + if (fw_feature_is("enabled", "no-need-store-drain-on-priv-state-switch", np)) + security_ftr_clear(SEC_FTR_STF_BARRIER); +} + +static void __init pnv_setup_security_mitigations(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + bool enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + init_fw_feat_flags(fw_features); + of_node_put(fw_features); + + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + type = L1D_FLUSH_MTTRIG; + + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) + type = L1D_FLUSH_ORI; + } + + /* + * The issues addressed by the entry and uaccess flush don't affect P7 + * or P8, so on bare metal disable them explicitly in case firmware does + * not include the features to disable them. POWER9 and newer processors + * should have the appropriate firmware flags. + */ + if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) || + pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) || + pvr_version_is(PVR_POWER8)) { + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); + + setup_rfi_flush(type, enable); + setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); + + setup_stf_barrier(); +} + +static void __init pnv_check_guarded_cores(void) +{ + struct device_node *dn; + int bad_count = 0; + + for_each_node_by_type(dn, "cpu") { + if (of_property_match_string(dn, "status", "bad") >= 0) + bad_count++; + } + + if (bad_count) { + printk(" _ _______________\n"); + pr_cont(" | | / \\\n"); + pr_cont(" | | | WARNING! |\n"); + pr_cont(" | | | |\n"); + pr_cont(" | | | It looks like |\n"); + pr_cont(" |_| | you have %*d |\n", 3, bad_count); + pr_cont(" _ | guarded cores |\n"); + pr_cont(" (_) \\_______________/\n"); + } +} + +static void __init pnv_setup_arch(void) +{ + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + + pnv_setup_security_mitigations(); + + /* Initialize SMP */ + pnv_smp_init(); + + /* Setup RTC and NVRAM callbacks */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + opal_nvram_init(); + + /* Enable NAP mode */ + powersave_nap = 1; + + pnv_check_guarded_cores(); + + /* XXX PMCS */ + + pnv_rng_init(); +} + +static void __init pnv_add_hw_description(void) +{ + struct device_node *dn; + const char *s; + + dn = of_find_node_by_path("/ibm,opal/firmware"); + if (!dn) + return; + + if (of_property_read_string(dn, "version", &s) == 0 || + of_property_read_string(dn, "git-id", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "opal:%s ", s); + + if (of_property_read_string(dn, "mi-version", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "mi:%s ", s); + + of_node_put(dn); +} + +static void __init pnv_init(void) +{ + pnv_add_hw_description(); + + /* + * Initialize the LPC bus now so that legacy serial + * ports can be found on it + */ + opal_lpc_init(); + +#ifdef CONFIG_HVC_OPAL + if (firmware_has_feature(FW_FEATURE_OPAL)) + hvc_opal_init_early(); + else +#endif + add_preferred_console("hvc", 0, NULL); + +#ifdef CONFIG_PPC_64S_HASH_MMU + if (!radix_enabled()) { + size_t size = sizeof(struct slb_entry) * mmu_slb_size; + int i; + + /* Allocate per cpu area to save old slb contents during MCE */ + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_faulty_slbs = + memblock_alloc_node(size, + __alignof__(struct slb_entry), + cpu_to_node(i)); + } + } +#endif +} + +static void __init pnv_init_IRQ(void) +{ + /* Try using a XIVE if available, otherwise use a XICS */ + if (!xive_native_init()) + xics_init(); + + WARN_ON(!ppc_md.get_irq); +} + +static void pnv_show_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: PowerNV %s\n", model); + if (firmware_has_feature(FW_FEATURE_OPAL)) + seq_printf(m, "firmware\t: OPAL\n"); + else + seq_printf(m, "firmware\t: BML\n"); + of_node_put(root); + if (radix_enabled()) + seq_printf(m, "MMU\t\t: Radix\n"); + else + seq_printf(m, "MMU\t\t: Hash\n"); +} + +static void pnv_prepare_going_down(void) +{ + /* + * Disable all notifiers from OPAL, we can't + * service interrupts anymore anyway + */ + opal_event_shutdown(); + + /* Print flash update message if one is scheduled. */ + opal_flash_update_print_message(); + + smp_send_stop(); + + hard_irq_disable(); +} + +static void __noreturn pnv_restart(char *cmd) +{ + long rc; + + pnv_prepare_going_down(); + + do { + if (!cmd || !strlen(cmd)) + rc = opal_cec_reboot(); + else if (strcmp(cmd, "full") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL); + else if (strcmp(cmd, "mpipl") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL); + else if (strcmp(cmd, "error") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL); + else if (strcmp(cmd, "fast") == 0) + rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL); + else + rc = OPAL_UNSUPPORTED; + + if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + /* Opal is busy wait for some time and retry */ + opal_poll_events(NULL); + mdelay(10); + + } else if (cmd && rc) { + /* Unknown error while issuing reboot */ + if (rc == OPAL_UNSUPPORTED) + pr_err("Unsupported '%s' reboot.\n", cmd); + else + pr_err("Unable to issue '%s' reboot. Err=%ld\n", + cmd, rc); + pr_info("Forcing a cec-reboot\n"); + cmd = NULL; + rc = OPAL_BUSY; + + } else if (rc != OPAL_SUCCESS) { + /* Unknown error while issuing cec-reboot */ + pr_err("Unable to reboot. Err=%ld\n", rc); + } + + } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT); + + for (;;) + opal_poll_events(NULL); +} + +static void __noreturn pnv_power_off(void) +{ + long rc = OPAL_BUSY; + + pnv_prepare_going_down(); + + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_cec_power_down(0); + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + else + mdelay(10); + } + for (;;) + opal_poll_events(NULL); +} + +static void __noreturn pnv_halt(void) +{ + pnv_power_off(); +} + +static void pnv_progress(char *s, unsigned short hex) +{ +} + +static void pnv_shutdown(void) +{ + /* Let the PCI code clear up IODA tables */ + pnv_pci_shutdown(); + + /* + * Stop OPAL activity: Unregister all OPAL interrupts so they + * don't fire up while we kexec and make sure all potentially + * DMA'ing ops are complete (such as dump retrieval). + */ + opal_shutdown(); +} + +#ifdef CONFIG_KEXEC_CORE +static void pnv_kexec_wait_secondaries_down(void) +{ + int my_cpu, i, notified = -1; + + my_cpu = get_cpu(); + + for_each_online_cpu(i) { + uint8_t status; + int64_t rc, timeout = 1000; + + if (i == my_cpu) + continue; + + for (;;) { + rc = opal_query_cpu_status(get_hard_smp_processor_id(i), + &status); + if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) + break; + barrier(); + if (i != notified) { + printk(KERN_INFO "kexec: waiting for cpu %d " + "(physical %d) to enter OPAL\n", + i, paca_ptrs[i]->hw_cpu_id); + notified = i; + } + + /* + * On crash secondaries might be unreachable or hung, + * so timeout if we've waited too long + * */ + mdelay(1); + if (timeout-- == 0) { + printk(KERN_ERR "kexec: timed out waiting for " + "cpu %d (physical %d) to enter OPAL\n", + i, paca_ptrs[i]->hw_cpu_id); + break; + } + } + } +} + +static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) +{ + u64 reinit_flags; + + if (xive_enabled()) + xive_teardown_cpu(); + else + xics_kexec_teardown_cpu(secondary); + + /* On OPAL, we return all CPUs to firmware */ + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return; + + if (secondary) { + /* Return secondary CPUs to firmware on OPAL v3 */ + mb(); + get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; + mb(); + + /* Return the CPU to OPAL */ + opal_return_cpu(); + } else { + /* Primary waits for the secondaries to have reached OPAL */ + pnv_kexec_wait_secondaries_down(); + + /* Switch XIVE back to emulation mode */ + if (xive_enabled()) + xive_shutdown(); + + /* + * We might be running as little-endian - now that interrupts + * are disabled, reset the HILE bit to big-endian so we don't + * take interrupts in the wrong endian later + * + * We reinit to enable both radix and hash on P9 to ensure + * the mode used by the next kernel is always supported. + */ + reinit_flags = OPAL_REINIT_CPUS_HILE_BE; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX | + OPAL_REINIT_CPUS_MMU_HASH; + opal_reinit_cpus(reinit_flags); + } +} +#endif /* CONFIG_KEXEC_CORE */ + +#ifdef CONFIG_MEMORY_HOTPLUG +static unsigned long pnv_memory_block_size(void) +{ + return memory_block_size; +} +#endif + +static void __init pnv_setup_machdep_opal(void) +{ + ppc_md.get_boot_time = opal_get_boot_time; + ppc_md.restart = pnv_restart; + pm_power_off = pnv_power_off; + ppc_md.halt = pnv_halt; + /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ + ppc_md.machine_check_exception = opal_machine_check; + ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; + if (opal_check_token(OPAL_HANDLE_HMI2)) + ppc_md.hmi_exception_early = opal_hmi_exception_early2; + else + ppc_md.hmi_exception_early = opal_hmi_exception_early; + ppc_md.handle_hmi_exception = opal_handle_hmi_exception; +} + +static int __init pnv_probe(void) +{ + if (firmware_has_feature(FW_FEATURE_OPAL)) + pnv_setup_machdep_opal(); + + pr_debug("PowerNV detected !\n"); + + pnv_init(); + + return 1; +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void __init pnv_tm_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL) || + !pvr_version_is(PVR_POWER9) || + early_cpu_has_feature(CPU_FTR_TM)) + return; + + if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS) + return; + + pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n"); + cur_cpu_spec->cpu_features |= CPU_FTR_TM; + /* Make sure "normal" HTM is off (it should be) */ + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM; + /* Turn on no suspend mode, and HTM no SC */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \ + PPC_FEATURE2_HTM_NOSC; + tm_suspend_disabled = true; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + +/* + * Returns the cpu frequency for 'cpu' in Hz. This is used by + * /proc/cpuinfo + */ +static unsigned long pnv_get_proc_freq(unsigned int cpu) +{ + unsigned long ret_freq; + + ret_freq = cpufreq_get(cpu) * 1000ul; + + /* + * If the backend cpufreq driver does not exist, + * then fallback to old way of reporting the clockrate. + */ + if (!ret_freq) + ret_freq = ppc_proc_freq; + return ret_freq; +} + +static long pnv_machine_check_early(struct pt_regs *regs) +{ + long handled = 0; + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + + return handled; +} + +define_machine(powernv) { + .name = "PowerNV", + .compatible = "ibm,powernv", + .probe = pnv_probe, + .setup_arch = pnv_setup_arch, + .init_IRQ = pnv_init_IRQ, + .show_cpuinfo = pnv_show_cpuinfo, + .get_proc_freq = pnv_get_proc_freq, + .discover_phbs = pnv_pci_init, + .progress = pnv_progress, + .machine_shutdown = pnv_shutdown, + .power_save = NULL, + .machine_check_early = pnv_machine_check_early, +#ifdef CONFIG_KEXEC_CORE + .kexec_cpu_down = pnv_kexec_cpu_down, +#endif +#ifdef CONFIG_MEMORY_HOTPLUG + .memory_block_size = pnv_memory_block_size, +#endif +}; diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c new file mode 100644 index 0000000000..9e1a25398f --- /dev/null +++ b/arch/powerpc/platforms/powernv/smp.c @@ -0,0 +1,441 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SMP support for PowerNV machines. + * + * Copyright 2011 IBM Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "powernv.h" + +#ifdef DEBUG +#include +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) do { } while (0) +#endif + +static void pnv_smp_setup_cpu(int cpu) +{ + /* + * P9 workaround for CI vector load (see traps.c), + * enable the corresponding HMI interrupt + */ + if (pvr_version_is(PVR_POWER9)) + mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17)); + + if (xive_enabled()) + xive_smp_setup_cpu(); + else if (cpu != boot_cpuid) + xics_setup_cpu(); +} + +static int pnv_smp_kick_cpu(int nr) +{ + unsigned int pcpu; + unsigned long start_here = + __pa(ppc_function_entry(generic_secondary_smp_init)); + long rc; + uint8_t status; + + if (nr < 0 || nr >= nr_cpu_ids) + return -EINVAL; + + pcpu = get_hard_smp_processor_id(nr); + /* + * If we already started or OPAL is not supported, we just + * kick the CPU via the PACA + */ + if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL)) + goto kick; + + /* + * At this point, the CPU can either be spinning on the way in + * from kexec or be inside OPAL waiting to be started for the + * first time. OPAL v3 allows us to query OPAL to know if it + * has the CPUs, so we do that + */ + rc = opal_query_cpu_status(pcpu, &status); + if (rc != OPAL_SUCCESS) { + pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr); + return -ENODEV; + } + + /* + * Already started, just kick it, probably coming from + * kexec and spinning + */ + if (status == OPAL_THREAD_STARTED) + goto kick; + + /* + * Available/inactive, let's kick it + */ + if (status == OPAL_THREAD_INACTIVE) { + pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu); + rc = opal_start_cpu(pcpu, start_here); + if (rc != OPAL_SUCCESS) { + pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr); + return -ENODEV; + } + } else { + /* + * An unavailable CPU (or any other unknown status) + * shouldn't be started. It should also + * not be in the possible map but currently it can + * happen + */ + pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable" + " (status %d)...\n", nr, pcpu, status); + return -ENODEV; + } + +kick: + return smp_generic_kick_cpu(nr); +} + +#ifdef CONFIG_HOTPLUG_CPU + +static int pnv_smp_cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* This is identical to pSeries... might consolidate by + * moving migrate_irqs_away to a ppc_md with default to + * the generic fixup_irqs. --BenH. + */ + set_cpu_online(cpu, false); + vdso_data->processorCount--; + if (cpu == boot_cpuid) + boot_cpuid = cpumask_any(cpu_online_mask); + if (xive_enabled()) + xive_smp_disable_cpu(); + else + xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + + return 0; +} + +static void pnv_flush_interrupts(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (xive_enabled()) + xive_flush_interrupt(); + else + icp_opal_flush_interrupt(); + } else { + icp_native_flush_interrupt(); + } +} + +static void pnv_cpu_offline_self(void) +{ + unsigned long srr1, unexpected_mask, wmask; + unsigned int cpu; + u64 lpcr_val; + + /* Standard hot unplug procedure */ + + idle_task_exit(); + cpu = smp_processor_id(); + DBG("CPU%d offline\n", cpu); + generic_set_cpu_dead(cpu); + smp_wmb(); + + wmask = SRR1_WAKEMASK; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + wmask = SRR1_WAKEMASK_P8; + + /* + * This turns the irq soft-disabled state we're called with, into a + * hard-disabled state with pending irq_happened interrupts cleared. + * + * PACA_IRQ_DEC - Decrementer should be ignored. + * PACA_IRQ_HMI - Can be ignored, processing is done in real mode. + * PACA_IRQ_DBELL, EE, PMI - Unexpected. + */ + hard_irq_disable(); + if (generic_check_cpu_restart(cpu)) + goto out; + + unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS); + if (local_paca->irq_happened & unexpected_mask) { + if (local_paca->irq_happened & PACA_IRQ_EE) + pnv_flush_interrupts(); + DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n", + cpu, local_paca->irq_happened); + } + local_paca->irq_happened = PACA_IRQ_HARD_DIS; + + /* + * We don't want to take decrementer interrupts while we are + * offline, so clear LPCR:PECE1. We keep PECE2 (and + * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in. + * + * If the CPU gets woken up by a special wakeup, ensure that + * the SLW engine sets LPCR with decrementer bit cleared, else + * the CPU will come back to the kernel due to a spurious + * wakeup. + */ + lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; + pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); + + while (!generic_check_cpu_restart(cpu)) { + /* + * Clear IPI flag, since we don't handle IPIs while + * offline, except for those when changing micro-threading + * mode, which are handled explicitly below, and those + * for coming online, which are handled via + * generic_check_cpu_restart() calls. + */ + kvmppc_clear_host_ipi(cpu); + + srr1 = pnv_cpu_offline(cpu); + + WARN_ON_ONCE(!irqs_disabled()); + WARN_ON(lazy_irq_pending()); + + /* + * If the SRR1 value indicates that we woke up due to + * an external interrupt, then clear the interrupt. + * We clear the interrupt before checking for the + * reason, so as to avoid a race where we wake up for + * some other reason, find nothing and clear the interrupt + * just as some other cpu is sending us an interrupt. + * If we returned from power7_nap as a result of + * having finished executing in a KVM guest, then srr1 + * contains 0. + */ + if (((srr1 & wmask) == SRR1_WAKEEE) || + ((srr1 & wmask) == SRR1_WAKEHVI)) { + pnv_flush_interrupts(); + } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); + asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); + } else if ((srr1 & wmask) == SRR1_WAKERESET) { + irq_set_pending_from_srr1(srr1); + /* Does not return */ + } + + smp_mb(); + + /* + * For kdump kernels, we process the ipi and jump to + * crash_ipi_callback + */ + if (kdump_in_progress()) { + /* + * If we got to this point, we've not used + * NMI's, otherwise we would have gone + * via the SRR1_WAKERESET path. We are + * using regular IPI's for waking up offline + * threads. + */ + struct pt_regs regs; + + ppc_save_regs(®s); + crash_ipi_callback(®s); + /* Does not return */ + } + + if (cpu_core_split_required()) + continue; + + if (srr1 && !generic_check_cpu_restart(cpu)) + DBG("CPU%d Unexpected exit while offline srr1=%lx!\n", + cpu, srr1); + + } + + /* + * Re-enable decrementer interrupts in LPCR. + * + * Further, we want stop states to be woken up by decrementer + * for non-hotplug cases. So program the LPCR via stop api as + * well. + */ + lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; + pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); +out: + DBG("CPU%d coming online...\n", cpu); +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +static int pnv_cpu_bootable(unsigned int nr) +{ + /* + * Starting with POWER8, the subcore logic relies on all threads of a + * core being booted so that they can participate in split mode + * switches. So on those machines we ignore the smt_enabled_at_boot + * setting (smt-enabled on the kernel command line). + */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + return 1; + + return smp_generic_cpu_bootable(nr); +} + +static int pnv_smp_prepare_cpu(int cpu) +{ + if (xive_enabled()) + return xive_smp_prepare_cpu(cpu); + return 0; +} + +/* Cause IPI as setup by the interrupt controller (xics or xive) */ +static void (*ic_cause_ipi)(int cpu); + +static void pnv_cause_ipi(int cpu) +{ + if (doorbell_try_core_ipi(cpu)) + return; + + ic_cause_ipi(cpu); +} + +static void __init pnv_smp_probe(void) +{ + if (xive_enabled()) + xive_smp_probe(); + else + xics_smp_probe(); + + if (cpu_has_feature(CPU_FTR_DBELL)) { + ic_cause_ipi = smp_ops->cause_ipi; + WARN_ON(!ic_cause_ipi); + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + smp_ops->cause_ipi = doorbell_global_ipi; + else + smp_ops->cause_ipi = pnv_cause_ipi; + } +} + +noinstr static int pnv_system_reset_exception(struct pt_regs *regs) +{ + if (smp_handle_nmi_ipi(regs)) + return 1; + return 0; +} + +static int pnv_cause_nmi_ipi(int cpu) +{ + int64_t rc; + + if (cpu >= 0) { + int h = get_hard_smp_processor_id(cpu); + + if (opal_check_token(OPAL_QUIESCE)) + opal_quiesce(QUIESCE_HOLD, h); + + rc = opal_signal_system_reset(h); + + if (opal_check_token(OPAL_QUIESCE)) + opal_quiesce(QUIESCE_RESUME, h); + + if (rc != OPAL_SUCCESS) + return 0; + return 1; + + } else if (cpu == NMI_IPI_ALL_OTHERS) { + bool success = true; + int c; + + if (opal_check_token(OPAL_QUIESCE)) + opal_quiesce(QUIESCE_HOLD, -1); + + /* + * We do not use broadcasts (yet), because it's not clear + * exactly what semantics Linux wants or the firmware should + * provide. + */ + for_each_online_cpu(c) { + if (c == smp_processor_id()) + continue; + + rc = opal_signal_system_reset( + get_hard_smp_processor_id(c)); + if (rc != OPAL_SUCCESS) + success = false; + } + + if (opal_check_token(OPAL_QUIESCE)) + opal_quiesce(QUIESCE_RESUME, -1); + + if (success) + return 1; + + /* + * Caller will fall back to doorbells, which may pick + * up the remainders. + */ + } + + return 0; +} + +static struct smp_ops_t pnv_smp_ops = { + .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ + .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */ + .cause_nmi_ipi = NULL, + .probe = pnv_smp_probe, + .prepare_cpu = pnv_smp_prepare_cpu, + .kick_cpu = pnv_smp_kick_cpu, + .setup_cpu = pnv_smp_setup_cpu, + .cpu_bootable = pnv_cpu_bootable, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = pnv_smp_cpu_disable, + .cpu_die = generic_cpu_die, + .cpu_offline_self = pnv_cpu_offline_self, +#endif /* CONFIG_HOTPLUG_CPU */ +}; + +/* This is called very early during platform setup_arch */ +void __init pnv_smp_init(void) +{ + if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) { + ppc_md.system_reset_exception = pnv_system_reset_exception; + pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi; + } + smp_ops = &pnv_smp_ops; + +#ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_KEXEC_CORE + crash_wake_offline = 1; +#endif +#endif +} diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S new file mode 100644 index 0000000000..e038f67617 --- /dev/null +++ b/arch/powerpc/platforms/powernv/subcore-asm.S @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. + */ + +#include +#include +#include + +#include "subcore.h" + + +_GLOBAL(split_core_secondary_loop) + /* + * r3 = u8 *state, used throughout the routine + * r4 = temp + * r5 = temp + * .. + * r12 = MSR + */ + mfmsr r12 + + /* Disable interrupts so SRR0/1 don't get trashed */ + li r4,0 + ori r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI + andc r4,r12,r4 + sync + mtmsrd r4 + + /* Switch to real mode and leave interrupts off */ + li r5, MSR_IR|MSR_DR + andc r5, r4, r5 + + LOAD_REG_ADDR(r4, real_mode) + + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + rfid + b . /* prevent speculative execution */ + +real_mode: + /* Grab values from unsplit SPRs */ + mfspr r6, SPRN_LDBAR + mfspr r7, SPRN_PMMAR + mfspr r8, SPRN_PMCR + mfspr r9, SPRN_RPR + mfspr r10, SPRN_SDR1 + + /* Order reading the SPRs vs telling the primary we are ready to split */ + sync + + /* Tell thread 0 we are in real mode */ + li r4, SYNC_STEP_REAL_MODE + stb r4, 0(r3) + + li r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest + sldi r5, r5, 48 + + /* Loop until we see the split happen in HID0 */ +1: mfspr r4, SPRN_HID0 + and. r4, r4, r5 + beq 1b + + /* + * We only need to initialise the below regs once for each subcore, + * but it's simpler and harmless to do it on each thread. + */ + + /* Make sure various SPRS have sane values */ + li r4, 0 + mtspr SPRN_LPID, r4 + mtspr SPRN_PCR, r4 + mtspr SPRN_HDEC, r4 + + /* Restore SPR values now we are split */ + mtspr SPRN_LDBAR, r6 + mtspr SPRN_PMMAR, r7 + mtspr SPRN_PMCR, r8 + mtspr SPRN_RPR, r9 + mtspr SPRN_SDR1, r10 + + LOAD_REG_ADDR(r5, virtual_mode) + + /* Get out of real mode */ + mtspr SPRN_SRR0,r5 + mtspr SPRN_SRR1,r12 + rfid + b . /* prevent speculative execution */ + +virtual_mode: + blr diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c new file mode 100644 index 0000000000..191424468f --- /dev/null +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. + */ + +#define pr_fmt(fmt) "powernv: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "subcore.h" +#include "powernv.h" + + +/* + * Split/unsplit procedure: + * + * A core can be in one of three states, unsplit, 2-way split, and 4-way split. + * + * The mapping to subcores_per_core is simple: + * + * State | subcores_per_core + * ------------|------------------ + * Unsplit | 1 + * 2-way split | 2 + * 4-way split | 4 + * + * The core is split along thread boundaries, the mapping between subcores and + * threads is as follows: + * + * Unsplit: + * ---------------------------- + * Subcore | 0 | + * ---------------------------- + * Thread | 0 1 2 3 4 5 6 7 | + * ---------------------------- + * + * 2-way split: + * ------------------------------------- + * Subcore | 0 | 1 | + * ------------------------------------- + * Thread | 0 1 2 3 | 4 5 6 7 | + * ------------------------------------- + * + * 4-way split: + * ----------------------------------------- + * Subcore | 0 | 1 | 2 | 3 | + * ----------------------------------------- + * Thread | 0 1 | 2 3 | 4 5 | 6 7 | + * ----------------------------------------- + * + * + * Transitions + * ----------- + * + * It is not possible to transition between either of the split states, the + * core must first be unsplit. The legal transitions are: + * + * ----------- --------------- + * | | <----> | 2-way split | + * | | --------------- + * | Unsplit | + * | | --------------- + * | | <----> | 4-way split | + * ----------- --------------- + * + * Unsplitting + * ----------- + * + * Unsplitting is the simpler procedure. It requires thread 0 to request the + * unsplit while all other threads NAP. + * + * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells + * the hardware that if all threads except 0 are napping, the hardware should + * unsplit the core. + * + * Non-zero threads are sent to a NAP loop, they don't exit the loop until they + * see the core unsplit. + * + * Core 0 spins waiting for the hardware to see all the other threads napping + * and perform the unsplit. + * + * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them + * out of NAP. They will then see the core unsplit and exit the NAP loop. + * + * Splitting + * --------- + * + * The basic splitting procedure is fairly straight forward. However it is + * complicated by the fact that after the split occurs, the newly created + * subcores are not in a fully initialised state. + * + * Most notably the subcores do not have the correct value for SDR1, which + * means they must not be running in virtual mode when the split occurs. The + * subcores have separate timebases SPRs but these are pre-synchronised by + * opal. + * + * To begin with secondary threads are sent to an assembly routine. There they + * switch to real mode, so they are immune to the uninitialised SDR1 value. + * Once in real mode they indicate that they are in real mode, and spin waiting + * to see the core split. + * + * Thread 0 waits to see that all secondaries are in real mode, and then begins + * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which + * prevents the hardware from unsplitting. Then it sets the appropriate HID bit + * to request the split, and spins waiting to see that the split has happened. + * + * Concurrently the secondaries will notice the split. When they do they set up + * their SPRs, notably SDR1, and then they can return to virtual mode and exit + * the procedure. + */ + +/* Initialised at boot by subcore_init() */ +static int subcores_per_core; + +/* + * Used to communicate to offline cpus that we want them to pop out of the + * offline loop and do a split or unsplit. + * + * 0 - no split happening + * 1 - unsplit in progress + * 2 - split to 2 in progress + * 4 - split to 4 in progress + */ +static int new_split_mode; + +static cpumask_var_t cpu_offline_mask; + +struct split_state { + u8 step; + u8 master; +}; + +static DEFINE_PER_CPU(struct split_state, split_state); + +static void wait_for_sync_step(int step) +{ + int i, cpu = smp_processor_id(); + + for (i = cpu + 1; i < cpu + threads_per_core; i++) + while(per_cpu(split_state, i).step < step) + barrier(); + + /* Order the wait loop vs any subsequent loads/stores. */ + mb(); +} + +static void update_hid_in_slw(u64 hid0) +{ + u64 idle_states = pnv_get_supported_cpuidle_states(); + + if (idle_states & OPAL_PM_WINKLE_ENABLED) { + /* OPAL call to patch slw with the new HID0 value */ + u64 cpu_pir = hard_smp_processor_id(); + + opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0); + } +} + +static inline void update_power8_hid0(unsigned long hid0) +{ + /* + * The HID0 update on Power8 should at the very least be + * preceded by a SYNC instruction followed by an ISYNC + * instruction + */ + asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); +} + +static void unsplit_core(void) +{ + u64 hid0, mask; + int i, cpu; + + mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; + + cpu = smp_processor_id(); + if (cpu_thread_in_core(cpu) != 0) { + while (mfspr(SPRN_HID0) & mask) + power7_idle_type(PNV_THREAD_NAP); + + per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; + return; + } + + hid0 = mfspr(SPRN_HID0); + hid0 &= ~HID0_POWER8_DYNLPARDIS; + update_power8_hid0(hid0); + update_hid_in_slw(hid0); + + while (mfspr(SPRN_HID0) & mask) + cpu_relax(); + + /* Wake secondaries out of NAP */ + for (i = cpu + 1; i < cpu + threads_per_core; i++) + smp_send_reschedule(i); + + wait_for_sync_step(SYNC_STEP_UNSPLIT); +} + +static void split_core(int new_mode) +{ + struct { u64 value; u64 mask; } split_parms[2] = { + { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE }, + { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE } + }; + int i, cpu; + u64 hid0; + + /* Convert new_mode (2 or 4) into an index into our parms array */ + i = (new_mode >> 1) - 1; + BUG_ON(i < 0 || i > 1); + + cpu = smp_processor_id(); + if (cpu_thread_in_core(cpu) != 0) { + split_core_secondary_loop(&per_cpu(split_state, cpu).step); + return; + } + + wait_for_sync_step(SYNC_STEP_REAL_MODE); + + /* Write new mode */ + hid0 = mfspr(SPRN_HID0); + hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; + update_power8_hid0(hid0); + update_hid_in_slw(hid0); + + /* Wait for it to happen */ + while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) + cpu_relax(); +} + +static void cpu_do_split(int new_mode) +{ + /* + * At boot subcores_per_core will be 0, so we will always unsplit at + * boot. In the usual case where the core is already unsplit it's a + * nop, and this just ensures the kernel's notion of the mode is + * consistent with the hardware. + */ + if (subcores_per_core != 1) + unsplit_core(); + + if (new_mode != 1) + split_core(new_mode); + + mb(); + per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED; +} + +bool cpu_core_split_required(void) +{ + smp_rmb(); + + if (!new_split_mode) + return false; + + cpu_do_split(new_split_mode); + + return true; +} + +void update_subcore_sibling_mask(void) +{ + int cpu; + /* + * sibling mask for the first cpu. Left shift this by required bits + * to get sibling mask for the rest of the cpus. + */ + int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1; + + for_each_possible_cpu(cpu) { + int tid = cpu_thread_in_core(cpu); + int offset = (tid / threads_per_subcore) * threads_per_subcore; + int mask = sibling_mask_first_cpu << offset; + + paca_ptrs[cpu]->subcore_sibling_mask = mask; + + } +} + +static int cpu_update_split_mode(void *data) +{ + int cpu, new_mode = *(int *)data; + + if (this_cpu_ptr(&split_state)->master) { + new_split_mode = new_mode; + smp_wmb(); + + cpumask_andnot(cpu_offline_mask, cpu_present_mask, + cpu_online_mask); + + /* This should work even though the cpu is offline */ + for_each_cpu(cpu, cpu_offline_mask) + smp_send_reschedule(cpu); + } + + cpu_do_split(new_mode); + + if (this_cpu_ptr(&split_state)->master) { + /* Wait for all cpus to finish before we touch subcores_per_core */ + for_each_present_cpu(cpu) { + if (cpu >= setup_max_cpus) + break; + + while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED) + barrier(); + } + + new_split_mode = 0; + + /* Make the new mode public */ + subcores_per_core = new_mode; + threads_per_subcore = threads_per_core / subcores_per_core; + update_subcore_sibling_mask(); + + /* Make sure the new mode is written before we exit */ + mb(); + } + + return 0; +} + +static int set_subcores_per_core(int new_mode) +{ + struct split_state *state; + int cpu; + + if (kvm_hv_mode_active()) { + pr_err("Unable to change split core mode while KVM active.\n"); + return -EBUSY; + } + + /* + * We are only called at boot, or from the sysfs write. If that ever + * changes we'll need a lock here. + */ + BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3); + + for_each_present_cpu(cpu) { + state = &per_cpu(split_state, cpu); + state->step = SYNC_STEP_INITIAL; + state->master = 0; + } + + cpus_read_lock(); + + /* This cpu will update the globals before exiting stop machine */ + this_cpu_ptr(&split_state)->master = 1; + + /* Ensure state is consistent before we call the other cpus */ + mb(); + + stop_machine_cpuslocked(cpu_update_split_mode, &new_mode, + cpu_online_mask); + + cpus_read_unlock(); + + return 0; +} + +static ssize_t __used store_subcores_per_core(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int rc; + + /* We are serialised by the attribute lock */ + + rc = sscanf(buf, "%lx", &val); + if (rc != 1) + return -EINVAL; + + switch (val) { + case 1: + case 2: + case 4: + if (subcores_per_core == val) + /* Nothing to do */ + goto out; + break; + default: + return -EINVAL; + } + + rc = set_subcores_per_core(val); + if (rc) + return rc; + +out: + return count; +} + +static ssize_t show_subcores_per_core(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%x\n", subcores_per_core); +} + +static DEVICE_ATTR(subcores_per_core, 0644, + show_subcores_per_core, store_subcores_per_core); + +static int subcore_init(void) +{ + struct device *dev_root; + unsigned pvr_ver; + int rc = 0; + + pvr_ver = PVR_VER(mfspr(SPRN_PVR)); + + if (pvr_ver != PVR_POWER8 && + pvr_ver != PVR_POWER8E && + pvr_ver != PVR_POWER8NVL) + return 0; + + /* + * We need all threads in a core to be present to split/unsplit so + * continue only if max_cpus are aligned to threads_per_core. + */ + if (setup_max_cpus % threads_per_core) + return 0; + + BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL)); + + set_subcores_per_core(1); + + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + rc = device_create_file(dev_root, &dev_attr_subcores_per_core); + put_device(dev_root); + } + return rc; +} +machine_device_initcall(powernv, subcore_init); diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h new file mode 100644 index 0000000000..77feee8436 --- /dev/null +++ b/arch/powerpc/platforms/powernv/subcore.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +/* These are ordered and tested with <= */ +#define SYNC_STEP_INITIAL 0 +#define SYNC_STEP_UNSPLIT 1 /* Set by secondary when it sees unsplit */ +#define SYNC_STEP_REAL_MODE 2 /* Set by secondary when in real mode */ +#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */ + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_SMP +void split_core_secondary_loop(u8 *state); +extern void update_subcore_sibling_mask(void); +#else +static inline void update_subcore_sibling_mask(void) { } +#endif /* CONFIG_SMP */ + +#endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c new file mode 100644 index 0000000000..67c8c4b2d8 --- /dev/null +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ultravisor high level interfaces + * + * Copyright 2019, IBM Corporation. + * + */ +#include +#include +#include +#include + +#include +#include +#include + +#include "powernv.h" + +static struct kobject *ultravisor_kobj; + +int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname, + int depth, void *data) +{ + if (!of_flat_dt_is_compatible(node, "ibm,ultravisor")) + return 0; + + powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR; + pr_debug("Ultravisor detected!\n"); + return 1; +} + +static struct memcons *uv_memcons; + +static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + return memcons_copy(uv_memcons, to, pos, count); +} + +static struct bin_attribute uv_msglog_attr = { + .attr = {.name = "msglog", .mode = 0400}, + .read = uv_msglog_read +}; + +static int __init uv_init(void) +{ + struct device_node *node; + + if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) + return 0; + + node = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware"); + if (!node) + return -ENODEV; + + uv_memcons = memcons_init(node, "memcons"); + of_node_put(node); + if (!uv_memcons) + return -ENOENT; + + uv_msglog_attr.size = memcons_get_size(uv_memcons); + + ultravisor_kobj = kobject_create_and_add("ultravisor", firmware_kobj); + if (!ultravisor_kobj) + return -ENOMEM; + + return sysfs_create_bin_file(ultravisor_kobj, &uv_msglog_attr); +} +machine_subsys_initcall(powernv, uv_init); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c new file mode 100644 index 0000000000..3ce89a4b54 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2016-17 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include "vas.h" + +static struct dentry *vas_debugfs; + +static char *cop_to_str(int cop) +{ + switch (cop) { + case VAS_COP_TYPE_FAULT: return "Fault"; + case VAS_COP_TYPE_842: return "NX-842 Normal Priority"; + case VAS_COP_TYPE_842_HIPRI: return "NX-842 High Priority"; + case VAS_COP_TYPE_GZIP: return "NX-GZIP Normal Priority"; + case VAS_COP_TYPE_GZIP_HIPRI: return "NX-GZIP High Priority"; + case VAS_COP_TYPE_FTW: return "Fast Thread-wakeup"; + default: return "Unknown"; + } +} + +static int info_show(struct seq_file *s, void *private) +{ + struct pnv_vas_window *window = s->private; + + mutex_lock(&vas_mutex); + + /* ensure window is not unmapped */ + if (!window->hvwc_map) + goto unlock; + + seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop), + window->tx_win ? "Send" : "Receive"); + seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win)); + +unlock: + mutex_unlock(&vas_mutex); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(info); + +static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win, + char *name, u32 reg) +{ + seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name); +} + +static int hvwc_show(struct seq_file *s, void *private) +{ + struct pnv_vas_window *window = s->private; + + mutex_lock(&vas_mutex); + + /* ensure window is not unmapped */ + if (!window->hvwc_map) + goto unlock; + + print_reg(s, window, VREG(LPID)); + print_reg(s, window, VREG(PID)); + print_reg(s, window, VREG(XLATE_MSR)); + print_reg(s, window, VREG(XLATE_LPCR)); + print_reg(s, window, VREG(XLATE_CTL)); + print_reg(s, window, VREG(AMR)); + print_reg(s, window, VREG(SEIDR)); + print_reg(s, window, VREG(FAULT_TX_WIN)); + print_reg(s, window, VREG(OSU_INTR_SRC_RA)); + print_reg(s, window, VREG(HV_INTR_SRC_RA)); + print_reg(s, window, VREG(PSWID)); + print_reg(s, window, VREG(LFIFO_BAR)); + print_reg(s, window, VREG(LDATA_STAMP_CTL)); + print_reg(s, window, VREG(LDMA_CACHE_CTL)); + print_reg(s, window, VREG(LRFIFO_PUSH)); + print_reg(s, window, VREG(CURR_MSG_COUNT)); + print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT)); + print_reg(s, window, VREG(LRX_WCRED)); + print_reg(s, window, VREG(LRX_WCRED_ADDER)); + print_reg(s, window, VREG(TX_WCRED)); + print_reg(s, window, VREG(TX_WCRED_ADDER)); + print_reg(s, window, VREG(LFIFO_SIZE)); + print_reg(s, window, VREG(WINCTL)); + print_reg(s, window, VREG(WIN_STATUS)); + print_reg(s, window, VREG(WIN_CTX_CACHING_CTL)); + print_reg(s, window, VREG(TX_RSVD_BUF_COUNT)); + print_reg(s, window, VREG(LRFIFO_WIN_PTR)); + print_reg(s, window, VREG(LNOTIFY_CTL)); + print_reg(s, window, VREG(LNOTIFY_PID)); + print_reg(s, window, VREG(LNOTIFY_LPID)); + print_reg(s, window, VREG(LNOTIFY_TID)); + print_reg(s, window, VREG(LNOTIFY_SCOPE)); + print_reg(s, window, VREG(NX_UTIL_ADDER)); +unlock: + mutex_unlock(&vas_mutex); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(hvwc); + +void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win) +{ + struct vas_window *window = &pnv_win->vas_win; + + if (window->dbgdir) { + debugfs_remove_recursive(window->dbgdir); + kfree(window->dbgname); + window->dbgdir = NULL; + window->dbgname = NULL; + } +} + +void vas_window_init_dbgdir(struct pnv_vas_window *window) +{ + struct dentry *d; + + if (!window->vinst->dbgdir) + return; + + window->vas_win.dbgname = kzalloc(16, GFP_KERNEL); + if (!window->vas_win.dbgname) + return; + + snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid); + + d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir); + window->vas_win.dbgdir = d; + + debugfs_create_file("info", 0444, d, window, &info_fops); + debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); +} + +void vas_instance_init_dbgdir(struct vas_instance *vinst) +{ + struct dentry *d; + + vas_init_dbgdir(); + + vinst->dbgname = kzalloc(16, GFP_KERNEL); + if (!vinst->dbgname) + return; + + snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id); + + d = debugfs_create_dir(vinst->dbgname, vas_debugfs); + vinst->dbgdir = d; +} + +/* + * Set up the "root" VAS debugfs dir. Return if we already set it up + * (or failed to) in an earlier instance of VAS. + */ +void vas_init_dbgdir(void) +{ + static bool first_time = true; + + if (!first_time) + return; + + first_time = false; + vas_debugfs = debugfs_create_dir("vas", NULL); +} diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c new file mode 100644 index 0000000000..2b47d5a863 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * VAS Fault handling. + * Copyright 2019, IBM Corporation + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vas.h" + +/* + * The maximum FIFO size for fault window can be 8MB + * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS + * instance will be having fault window. + * 8MB FIFO can be used if expects more faults for each VAS + * instance. + */ +#define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) + +static void dump_fifo(struct vas_instance *vinst, void *entry) +{ + unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; + unsigned long *fifo = entry; + int i; + + pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size, + vinst->fault_fifo_size / CRB_SIZE); + + /* Dump 10 CRB entries or until end of FIFO */ + pr_err("Fault FIFO Dump:\n"); + for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) { + pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n", + i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3)); + } +} + +/* + * Process valid CRBs in fault FIFO. + * NX process user space requests, return credit and update the status + * in CRB. If it encounters transalation error when accessing CRB or + * request buffers, raises interrupt on the CPU to handle the fault. + * It takes credit on fault window, updates nx_fault_stamp in CRB with + * the following information and pastes CRB in fault FIFO. + * + * pswid - window ID of the window on which the request is sent. + * fault_storage_addr - fault address + * + * It can raise a single interrupt for multiple faults. Expects OS to + * process all valid faults and return credit for each fault on user + * space and fault windows. This fault FIFO control will be done with + * credit mechanism. NX can continuously paste CRBs until credits are not + * available on fault window. Otherwise, returns with RMA_reject. + * + * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128) + * + */ +irqreturn_t vas_fault_thread_fn(int irq, void *data) +{ + struct vas_instance *vinst = data; + struct coprocessor_request_block *crb, *entry; + struct coprocessor_request_block buf; + struct pnv_vas_window *window; + unsigned long flags; + void *fifo; + + crb = &buf; + + /* + * VAS can interrupt with multiple page faults. So process all + * valid CRBs within fault FIFO until reaches invalid CRB. + * We use CCW[0] and pswid to validate CRBs: + * + * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0 + * OS sets this bit to 1 after reading CRB. + * pswid NX assigns window ID. Set pswid to -1 after + * reading CRB from fault FIFO. + * + * We exit this function if no valid CRBs are available to process. + * So acquire fault_lock and reset fifo_in_progress to 0 before + * exit. + * In case kernel receives another interrupt with different page + * fault, interrupt handler returns with IRQ_HANDLED if + * fifo_in_progress is set. Means these new faults will be + * handled by the current thread. Otherwise set fifo_in_progress + * and return IRQ_WAKE_THREAD to wake up thread. + */ + while (true) { + spin_lock_irqsave(&vinst->fault_lock, flags); + /* + * Advance the fault fifo pointer to next CRB. + * Use CRB_SIZE rather than sizeof(*crb) since the latter is + * aligned to CRB_ALIGN (256) but the CRB written to by VAS is + * only CRB_SIZE in len. + */ + fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE); + entry = fifo; + + if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY)) + || (entry->ccw & cpu_to_be32(CCW0_INVALID))) { + vinst->fifo_in_progress = 0; + spin_unlock_irqrestore(&vinst->fault_lock, flags); + return IRQ_HANDLED; + } + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + vinst->fault_crbs++; + if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE)) + vinst->fault_crbs = 0; + + memcpy(crb, fifo, CRB_SIZE); + entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY); + entry->ccw |= cpu_to_be32(CCW0_INVALID); + /* + * Return credit for the fault window. + */ + vas_return_credit(vinst->fault_win, false); + + pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n", + vinst->vas_id, vinst->fault_fifo, fifo, + vinst->fault_crbs); + + vas_dump_crb(crb); + window = vas_pswid_to_window(vinst, + be32_to_cpu(crb->stamp.nx.pswid)); + + if (IS_ERR(window)) { + /* + * We got an interrupt about a specific send + * window but we can't find that window and we can't + * even clean it up (return credit on user space + * window). + * But we should not get here. + * TODO: Disable IRQ. + */ + dump_fifo(vinst, (void *)entry); + pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n", + vinst->vas_id, vinst->fault_fifo, fifo, + be32_to_cpu(crb->stamp.nx.pswid), + vinst->fault_crbs); + + WARN_ON_ONCE(1); + } else { + /* + * NX sees faults only with user space windows. + */ + if (window->user_win) + vas_update_csb(crb, &window->vas_win.task_ref); + else + WARN_ON_ONCE(!window->user_win); + + /* + * Return credit for send window after processing + * fault CRB. + */ + vas_return_credit(window, true); + } + } +} + +irqreturn_t vas_fault_handler(int irq, void *dev_id) +{ + struct vas_instance *vinst = dev_id; + irqreturn_t ret = IRQ_WAKE_THREAD; + unsigned long flags; + + /* + * NX can generate an interrupt for multiple faults. So the + * fault handler thread process all CRBs until finds invalid + * entry. In case if NX sees continuous faults, it is possible + * that the thread function entered with the first interrupt + * can execute and process all valid CRBs. + * So wake up thread only if the fault thread is not in progress. + */ + spin_lock_irqsave(&vinst->fault_lock, flags); + + if (vinst->fifo_in_progress) + ret = IRQ_HANDLED; + else + vinst->fifo_in_progress = 1; + + spin_unlock_irqrestore(&vinst->fault_lock, flags); + + return ret; +} + +/* + * Fault window is opened per VAS instance. NX pastes fault CRB in fault + * FIFO upon page faults. + */ +int vas_setup_fault_window(struct vas_instance *vinst) +{ + struct vas_rx_win_attr attr; + struct vas_window *win; + + vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; + vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); + if (!vinst->fault_fifo) { + pr_err("Unable to alloc %d bytes for fault_fifo\n", + vinst->fault_fifo_size); + return -ENOMEM; + } + + /* + * Invalidate all CRB entries. NX pastes valid entry for each fault. + */ + memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size); + vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); + + attr.rx_fifo_size = vinst->fault_fifo_size; + attr.rx_fifo = __pa(vinst->fault_fifo); + + /* + * Max creds is based on number of CRBs can fit in the FIFO. + * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds + * will be 0xffff since the receive creds field is 16bits wide. + */ + attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE; + attr.lnotify_lpid = 0; + attr.lnotify_pid = mfspr(SPRN_PID); + attr.lnotify_tid = mfspr(SPRN_PID); + + win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr); + if (IS_ERR(win)) { + pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win)); + kfree(vinst->fault_fifo); + return PTR_ERR(win); + } + + vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win); + + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", + vinst->fault_win->vas_win.winid, attr.lnotify_lpid, + attr.lnotify_pid, attr.lnotify_tid); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h new file mode 100644 index 0000000000..ca2e08f2dd --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-trace.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vas + +#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _VAS_TRACE_H +#include +#include +#include + +TRACE_EVENT( vas_rx_win_open, + + TP_PROTO(struct task_struct *tsk, + int vasid, + int cop, + struct vas_rx_win_attr *rxattr), + + TP_ARGS(tsk, vasid, cop, rxattr), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(int, pid) + __field(int, cop) + __field(int, vasid) + __field(struct vas_rx_win_attr *, rxattr) + __field(int, lnotify_lpid) + __field(int, lnotify_pid) + __field(int, lnotify_tid) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = vasid; + __entry->cop = cop; + __entry->lnotify_lpid = rxattr->lnotify_lpid; + __entry->lnotify_pid = rxattr->lnotify_pid; + __entry->lnotify_tid = rxattr->lnotify_tid; + ), + + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d", + __entry->pid, __entry->vasid, __entry->cop, + __entry->lnotify_lpid, __entry->lnotify_pid, + __entry->lnotify_tid) +); + +TRACE_EVENT( vas_tx_win_open, + + TP_PROTO(struct task_struct *tsk, + int vasid, + int cop, + struct vas_tx_win_attr *txattr), + + TP_ARGS(tsk, vasid, cop, txattr), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(int, pid) + __field(int, cop) + __field(int, vasid) + __field(struct vas_tx_win_attr *, txattr) + __field(int, lpid) + __field(int, pidr) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = vasid; + __entry->cop = cop; + __entry->lpid = txattr->lpid; + __entry->pidr = txattr->pidr; + ), + + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d", + __entry->pid, __entry->vasid, __entry->cop, + __entry->lpid, __entry->pidr) +); + +TRACE_EVENT( vas_paste_crb, + + TP_PROTO(struct task_struct *tsk, + struct pnv_vas_window *win), + + TP_ARGS(tsk, win), + + TP_STRUCT__entry( + __field(struct task_struct *, tsk) + __field(struct vas_window *, win) + __field(int, pid) + __field(int, vasid) + __field(int, winid) + __field(unsigned long, paste_kaddr) + ), + + TP_fast_assign( + __entry->pid = tsk->pid; + __entry->vasid = win->vinst->vas_id; + __entry->winid = win->vas_win.winid; + __entry->paste_kaddr = (unsigned long)win->paste_kaddr + ), + + TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n", + __entry->pid, __entry->vasid, __entry->winid, + __entry->paste_kaddr) +); + +#endif /* _VAS_TRACE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv +#define TRACE_INCLUDE_FILE vas-trace +#include diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c new file mode 100644 index 0000000000..b664838008 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -0,0 +1,1471 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2016-17 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vas.h" +#include "copy-paste.h" + +#define CREATE_TRACE_POINTS +#include "vas-trace.h" + +/* + * Compute the paste address region for the window @window using the + * ->paste_base_addr and ->paste_win_id_shift we got from device tree. + */ +void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len) +{ + int winid; + u64 base, shift; + + base = window->vinst->paste_base_addr; + shift = window->vinst->paste_win_id_shift; + winid = window->vas_win.winid; + + *addr = base + (winid << shift); + if (len) + *len = PAGE_SIZE; + + pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); +} + +static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window, + u64 *start, int *len) +{ + u64 pbaddr; + + pbaddr = window->vinst->hvwc_bar_start; + *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE; + *len = VAS_HVWC_SIZE; +} + +static inline void get_uwc_mmio_bar(struct pnv_vas_window *window, + u64 *start, int *len) +{ + u64 pbaddr; + + pbaddr = window->vinst->uwc_bar_start; + *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE; + *len = VAS_UWC_SIZE; +} + +/* + * Map the paste bus address of the given send window into kernel address + * space. Unlike MMIO regions (map_mmio_region() below), paste region must + * be mapped cache-able and is only applicable to send windows. + */ +static void *map_paste_region(struct pnv_vas_window *txwin) +{ + int len; + void *map; + char *name; + u64 start; + + name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id, + txwin->vas_win.winid); + if (!name) + goto free_name; + + txwin->paste_addr_name = name; + vas_win_paste_addr(txwin, &start, &len); + + if (!request_mem_region(start, len, name)) { + pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", + __func__, start, len); + goto free_name; + } + + map = ioremap_cache(start, len); + if (!map) { + pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__, + start, len); + goto free_name; + } + + pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map); + return map; + +free_name: + kfree(name); + return ERR_PTR(-ENOMEM); +} + +static void *map_mmio_region(char *name, u64 start, int len) +{ + void *map; + + if (!request_mem_region(start, len, name)) { + pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n", + __func__, start, len); + return NULL; + } + + map = ioremap(start, len); + if (!map) { + pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start, + len); + return NULL; + } + + return map; +} + +static void unmap_region(void *addr, u64 start, int len) +{ + iounmap(addr); + release_mem_region((phys_addr_t)start, len); +} + +/* + * Unmap the paste address region for a window. + */ +static void unmap_paste_region(struct pnv_vas_window *window) +{ + int len; + u64 busaddr_start; + + if (window->paste_kaddr) { + vas_win_paste_addr(window, &busaddr_start, &len); + unmap_region(window->paste_kaddr, busaddr_start, len); + window->paste_kaddr = NULL; + kfree(window->paste_addr_name); + window->paste_addr_name = NULL; + } +} + +/* + * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't + * unmap when the window's debugfs dir is in use. This serializes close + * of a window even on another VAS instance but since its not a critical + * path, just minimize the time we hold the mutex for now. We can add + * a per-instance mutex later if necessary. + */ +static void unmap_winctx_mmio_bars(struct pnv_vas_window *window) +{ + int len; + void *uwc_map; + void *hvwc_map; + u64 busaddr_start; + + mutex_lock(&vas_mutex); + + hvwc_map = window->hvwc_map; + window->hvwc_map = NULL; + + uwc_map = window->uwc_map; + window->uwc_map = NULL; + + mutex_unlock(&vas_mutex); + + if (hvwc_map) { + get_hvwc_mmio_bar(window, &busaddr_start, &len); + unmap_region(hvwc_map, busaddr_start, len); + } + + if (uwc_map) { + get_uwc_mmio_bar(window, &busaddr_start, &len); + unmap_region(uwc_map, busaddr_start, len); + } +} + +/* + * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the + * OS/User Window Context (UWC) MMIO Base Address Region for the given window. + * Map these bus addresses and save the mapped kernel addresses in @window. + */ +static int map_winctx_mmio_bars(struct pnv_vas_window *window) +{ + int len; + u64 start; + + get_hvwc_mmio_bar(window, &start, &len); + window->hvwc_map = map_mmio_region("HVWCM_Window", start, len); + + get_uwc_mmio_bar(window, &start, &len); + window->uwc_map = map_mmio_region("UWCM_Window", start, len); + + if (!window->hvwc_map || !window->uwc_map) { + unmap_winctx_mmio_bars(window); + return -1; + } + + return 0; +} + +/* + * Reset all valid registers in the HV and OS/User Window Contexts for + * the window identified by @window. + * + * NOTE: We cannot really use a for loop to reset window context. Not all + * offsets in a window context are valid registers and the valid + * registers are not sequential. And, we can only write to offsets + * with valid registers. + */ +static void reset_window_regs(struct pnv_vas_window *window) +{ + write_hvwc_reg(window, VREG(LPID), 0ULL); + write_hvwc_reg(window, VREG(PID), 0ULL); + write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL); + write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL); + write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL); + write_hvwc_reg(window, VREG(AMR), 0ULL); + write_hvwc_reg(window, VREG(SEIDR), 0ULL); + write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL); + write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL); + write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL); + write_hvwc_reg(window, VREG(PSWID), 0ULL); + write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL); + write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL); + write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL); + write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL); + write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL); + write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL); + write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL); + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); + write_hvwc_reg(window, VREG(TX_WCRED), 0ULL); + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); + write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL); + write_hvwc_reg(window, VREG(WINCTL), 0ULL); + write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL); + write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL); + write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL); + write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL); + write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL); + write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL); + write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL); + write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL); + write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL); + write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL); + + /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */ + + /* + * The send and receive window credit adder registers are also + * accessible from HVWC and have been initialized above. We don't + * need to initialize from the OS/User Window Context, so skip + * following calls: + * + * write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); + * write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); + */ +} + +/* + * Initialize window context registers related to Address Translation. + * These registers are common to send/receive windows although they + * differ for user/kernel windows. As we resolve the TODOs we may + * want to add fields to vas_winctx and move the initialization to + * init_vas_winctx_regs(). + */ +static void init_xlate_regs(struct pnv_vas_window *window, bool user_win) +{ + u64 lpcr, val; + + /* + * MSR_TA, MSR_US are false for both kernel and user. + * MSR_DR and MSR_PR are false for kernel. + */ + val = 0ULL; + val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1); + val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1); + if (user_win) { + val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1); + val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1); + } + write_hvwc_reg(window, VREG(XLATE_MSR), val); + + lpcr = mfspr(SPRN_LPCR); + val = 0ULL; + /* + * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the + * Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB. + * + * NOTE: From Section 1.3.1, Address Translation Context of the + * Nest MMU Workbook, LPCR_SC should be 0 for Power9. + */ + val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5); + val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL); + val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC); + val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0); + write_hvwc_reg(window, VREG(XLATE_LPCR), val); + + /* + * Section 1.3.1 (Address translation Context) of NMMU workbook. + * 0b00 Hashed Page Table mode + * 0b01 Reserved + * 0b10 Radix on HPT + * 0b11 Radix on Radix + */ + val = 0ULL; + val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2); + write_hvwc_reg(window, VREG(XLATE_CTL), val); + + /* + * TODO: Can we mfspr(AMR) even for user windows? + */ + val = 0ULL; + val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR)); + write_hvwc_reg(window, VREG(AMR), val); + + val = 0ULL; + val = SET_FIELD(VAS_SEIDR, val, 0); + write_hvwc_reg(window, VREG(SEIDR), val); +} + +/* + * Initialize Reserved Send Buffer Count for the send window. It involves + * writing to the register, reading it back to confirm that the hardware + * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook. + * + * Since we can only make a best-effort attempt to fulfill the request, + * we don't return any errors if we cannot. + * + * TODO: Reserved (aka dedicated) send buffers are not supported yet. + */ +static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin, + struct vas_winctx *winctx) +{ + write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL); +} + +/* + * init_winctx_regs() + * Initialize window context registers for a receive window. + * Except for caching control and marking window open, the registers + * are initialized in the order listed in Section 3.1.4 (Window Context + * Cache Register Details) of the VAS workbook although they don't need + * to be. + * + * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL + * (so that it can get a large contiguous area) and passes that buffer + * to kernel via device tree. We now write that buffer address to the + * FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL + * write the per-chip RX FIFO addresses to the windows during boot-up + * as a one-time task? That could work for NX but what about other + * receivers? Let the receivers tell us the rx-fifo buffers for now. + */ +static void init_winctx_regs(struct pnv_vas_window *window, + struct vas_winctx *winctx) +{ + u64 val; + int fifo_size; + + reset_window_regs(window); + + val = 0ULL; + val = SET_FIELD(VAS_LPID, val, winctx->lpid); + write_hvwc_reg(window, VREG(LPID), val); + + val = 0ULL; + val = SET_FIELD(VAS_PID_ID, val, winctx->pidr); + write_hvwc_reg(window, VREG(PID), val); + + init_xlate_regs(window, winctx->user_win); + + val = 0ULL; + val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id); + write_hvwc_reg(window, VREG(FAULT_TX_WIN), val); + + /* In PowerNV, interrupts go to HV. */ + write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL); + + val = 0ULL; + val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port); + write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val); + + val = 0ULL; + val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid); + write_hvwc_reg(window, VREG(PSWID), val); + + write_hvwc_reg(window, VREG(SPARE1), 0ULL); + write_hvwc_reg(window, VREG(SPARE2), 0ULL); + write_hvwc_reg(window, VREG(SPARE3), 0ULL); + + /* + * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR + * register as is - do NOT shift the address into VAS_LFIFO_BAR + * bit fields! Ok to set the page migration select fields - + * VAS ignores the lower 10+ bits in the address anyway, because + * the minimum FIFO size is 1K? + * + * See also: Design note in function header. + */ + val = winctx->rx_fifo; + val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0); + write_hvwc_reg(window, VREG(LFIFO_BAR), val); + + val = 0ULL; + val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp); + write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val); + + val = 0ULL; + val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type); + val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable); + write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val); + + write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL); + write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL); + write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL); + + val = 0ULL; + val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max); + write_hvwc_reg(window, VREG(LRX_WCRED), val); + + val = 0ULL; + val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max); + write_hvwc_reg(window, VREG(TX_WCRED), val); + + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); + + fifo_size = winctx->rx_fifo_size / 1024; + + val = 0ULL; + val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size)); + write_hvwc_reg(window, VREG(LFIFO_SIZE), val); + + /* Update window control and caching control registers last so + * we mark the window open only after fully initializing it and + * pushing context to cache. + */ + + write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL); + + init_rsvd_tx_buf_count(window, winctx); + + /* for a send window, point to the matching receive window */ + val = 0ULL; + val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id); + write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val); + + write_hvwc_reg(window, VREG(SPARE4), 0ULL); + + val = 0ULL; + val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable); + val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable); + val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early); + val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg); + write_hvwc_reg(window, VREG(LNOTIFY_CTL), val); + + val = 0ULL; + val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid); + write_hvwc_reg(window, VREG(LNOTIFY_PID), val); + + val = 0ULL; + val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid); + write_hvwc_reg(window, VREG(LNOTIFY_LPID), val); + + val = 0ULL; + val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid); + write_hvwc_reg(window, VREG(LNOTIFY_TID), val); + + val = 0ULL; + val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope); + val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope); + write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val); + + /* Skip read-only registers NX_UTIL and NX_UTIL_SE */ + + write_hvwc_reg(window, VREG(SPARE5), 0ULL); + write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL); + write_hvwc_reg(window, VREG(SPARE6), 0ULL); + + /* Finally, push window context to memory and... */ + val = 0ULL; + val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1); + write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val); + + /* ... mark the window open for business */ + val = 0ULL; + val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit); + val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win); + val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode); + val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode); + val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode); + val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode); + val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win); + val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win); + val = SET_FIELD(VAS_WINCTL_OPEN, val, 1); + write_hvwc_reg(window, VREG(WINCTL), val); +} + +static void vas_release_window_id(struct ida *ida, int winid) +{ + ida_free(ida, winid); +} + +static int vas_assign_window_id(struct ida *ida) +{ + int winid = ida_alloc_max(ida, VAS_WINDOWS_PER_CHIP - 1, GFP_KERNEL); + + if (winid == -ENOSPC) { + pr_err("Too many (%d) open windows\n", VAS_WINDOWS_PER_CHIP); + return -EAGAIN; + } + + return winid; +} + +static void vas_window_free(struct pnv_vas_window *window) +{ + struct vas_instance *vinst = window->vinst; + int winid = window->vas_win.winid; + + unmap_winctx_mmio_bars(window); + + vas_window_free_dbgdir(window); + + kfree(window); + + vas_release_window_id(&vinst->ida, winid); +} + +static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst) +{ + int winid; + struct pnv_vas_window *window; + + winid = vas_assign_window_id(&vinst->ida); + if (winid < 0) + return ERR_PTR(winid); + + window = kzalloc(sizeof(*window), GFP_KERNEL); + if (!window) + goto out_free; + + window->vinst = vinst; + window->vas_win.winid = winid; + + if (map_winctx_mmio_bars(window)) + goto out_free; + + vas_window_init_dbgdir(window); + + return window; + +out_free: + kfree(window); + vas_release_window_id(&vinst->ida, winid); + return ERR_PTR(-ENOMEM); +} + +static void put_rx_win(struct pnv_vas_window *rxwin) +{ + /* Better not be a send window! */ + WARN_ON_ONCE(rxwin->tx_win); + + atomic_dec(&rxwin->num_txwins); +} + +/* + * Find the user space receive window given the @pswid. + * - We must have a valid vasid and it must belong to this instance. + * (so both send and receive windows are on the same VAS instance) + * - The window must refer to an OPEN, FTW, RECEIVE window. + * + * NOTE: We access ->windows[] table and assume that vinst->mutex is held. + */ +static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst, + u32 pswid) +{ + int vasid, winid; + struct pnv_vas_window *rxwin; + + decode_pswid(pswid, &vasid, &winid); + + if (vinst->vas_id != vasid) + return ERR_PTR(-EINVAL); + + rxwin = vinst->windows[winid]; + + if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW) + return ERR_PTR(-EINVAL); + + return rxwin; +} + +/* + * Get the VAS receive window associated with NX engine identified + * by @cop and if applicable, @pswid. + * + * See also function header of set_vinst_win(). + */ +static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst, + enum vas_cop_type cop, u32 pswid) +{ + struct pnv_vas_window *rxwin; + + mutex_lock(&vinst->mutex); + + if (cop == VAS_COP_TYPE_FTW) + rxwin = get_user_rxwin(vinst, pswid); + else + rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL); + + if (!IS_ERR(rxwin)) + atomic_inc(&rxwin->num_txwins); + + mutex_unlock(&vinst->mutex); + + return rxwin; +} + +/* + * We have two tables of windows in a VAS instance. The first one, + * ->windows[], contains all the windows in the instance and allows + * looking up a window by its id. It is used to look up send windows + * during fault handling and receive windows when pairing user space + * send/receive windows. + * + * The second table, ->rxwin[], contains receive windows that are + * associated with NX engines. This table has VAS_COP_TYPE_MAX + * entries and is used to look up a receive window by its + * coprocessor type. + * + * Here, we save @window in the ->windows[] table. If it is a receive + * window, we also save the window in the ->rxwin[] table. + */ +static void set_vinst_win(struct vas_instance *vinst, + struct pnv_vas_window *window) +{ + int id = window->vas_win.winid; + + mutex_lock(&vinst->mutex); + + /* + * There should only be one receive window for a coprocessor type + * unless its a user (FTW) window. + */ + if (!window->user_win && !window->tx_win) { + WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = window; + } + + WARN_ON_ONCE(vinst->windows[id] != NULL); + vinst->windows[id] = window; + + mutex_unlock(&vinst->mutex); +} + +/* + * Clear this window from the table(s) of windows for this VAS instance. + * See also function header of set_vinst_win(). + */ +static void clear_vinst_win(struct pnv_vas_window *window) +{ + int id = window->vas_win.winid; + struct vas_instance *vinst = window->vinst; + + mutex_lock(&vinst->mutex); + + if (!window->user_win && !window->tx_win) { + WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = NULL; + } + + WARN_ON_ONCE(vinst->windows[id] != window); + vinst->windows[id] = NULL; + + mutex_unlock(&vinst->mutex); +} + +static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin, + struct vas_rx_win_attr *rxattr, + struct vas_winctx *winctx) +{ + /* + * We first zero (memset()) all fields and only set non-zero fields. + * Following fields are 0/false but maybe deserve a comment: + * + * ->notify_os_intr_reg In powerNV, send intrs to HV + * ->notify_disable False for NX windows + * ->intr_disable False for Fault Windows + * ->xtra_write False for NX windows + * ->notify_early NA for NX windows + * ->rsvd_txbuf_count NA for Rx windows + * ->lpid, ->pid, ->tid NA for Rx windows + */ + + memset(winctx, 0, sizeof(struct vas_winctx)); + + winctx->rx_fifo = rxattr->rx_fifo; + winctx->rx_fifo_size = rxattr->rx_fifo_size; + winctx->wcreds_max = rxwin->vas_win.wcreds_max; + winctx->pin_win = rxattr->pin_win; + + winctx->nx_win = rxattr->nx_win; + winctx->fault_win = rxattr->fault_win; + winctx->user_win = rxattr->user_win; + winctx->rej_no_credit = rxattr->rej_no_credit; + winctx->rx_word_mode = rxattr->rx_win_ord_mode; + winctx->tx_word_mode = rxattr->tx_win_ord_mode; + winctx->rx_wcred_mode = rxattr->rx_wcred_mode; + winctx->tx_wcred_mode = rxattr->tx_wcred_mode; + winctx->notify_early = rxattr->notify_early; + + if (winctx->nx_win) { + winctx->data_stamp = true; + winctx->intr_disable = true; + winctx->pin_win = true; + + WARN_ON_ONCE(winctx->fault_win); + WARN_ON_ONCE(!winctx->rx_word_mode); + WARN_ON_ONCE(!winctx->tx_word_mode); + WARN_ON_ONCE(winctx->notify_after_count); + } else if (winctx->fault_win) { + winctx->notify_disable = true; + } else if (winctx->user_win) { + /* + * Section 1.8.1 Low Latency Core-Core Wake up of + * the VAS workbook: + * + * - disable credit checks ([tr]x_wcred_mode = false) + * - disable FIFO writes + * - enable ASB_Notify, disable interrupt + */ + winctx->fifo_disable = true; + winctx->intr_disable = true; + winctx->rx_fifo = 0; + } + + winctx->lnotify_lpid = rxattr->lnotify_lpid; + winctx->lnotify_pid = rxattr->lnotify_pid; + winctx->lnotify_tid = rxattr->lnotify_tid; + winctx->pswid = rxattr->pswid; + winctx->dma_type = VAS_DMA_TYPE_INJECT; + winctx->tc_mode = rxattr->tc_mode; + + winctx->min_scope = VAS_SCOPE_LOCAL; + winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (rxwin->vinst->virq) + winctx->irq_port = rxwin->vinst->irq_port; +} + +static bool rx_win_args_valid(enum vas_cop_type cop, + struct vas_rx_win_attr *attr) +{ + pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n", + attr->fault_win, attr->notify_disable, + attr->intr_disable, attr->notify_early, + attr->rx_fifo_size); + + if (cop >= VAS_COP_TYPE_MAX) + return false; + + if (cop != VAS_COP_TYPE_FTW && + attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN) + return false; + + if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) + return false; + + if (!attr->wcreds_max) + return false; + + if (attr->nx_win) { + /* cannot be fault or user window if it is nx */ + if (attr->fault_win || attr->user_win) + return false; + /* + * Section 3.1.4.32: NX Windows must not disable notification, + * and must not enable interrupts or early notification. + */ + if (attr->notify_disable || !attr->intr_disable || + attr->notify_early) + return false; + } else if (attr->fault_win) { + /* cannot be both fault and user window */ + if (attr->user_win) + return false; + + /* + * Section 3.1.4.32: Fault windows must disable notification + * but not interrupts. + */ + if (!attr->notify_disable || attr->intr_disable) + return false; + + } else if (attr->user_win) { + /* + * User receive windows are only for fast-thread-wakeup + * (FTW). They don't need a FIFO and must disable interrupts + */ + if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable) + return false; + } else { + /* Rx window must be one of NX or Fault or User window. */ + return false; + } + + return true; +} + +void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop) +{ + memset(rxattr, 0, sizeof(*rxattr)); + + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { + rxattr->pin_win = true; + rxattr->nx_win = true; + rxattr->fault_win = false; + rxattr->intr_disable = true; + rxattr->rx_wcred_mode = true; + rxattr->tx_wcred_mode = true; + rxattr->rx_win_ord_mode = true; + rxattr->tx_win_ord_mode = true; + } else if (cop == VAS_COP_TYPE_FAULT) { + rxattr->pin_win = true; + rxattr->fault_win = true; + rxattr->notify_disable = true; + rxattr->rx_wcred_mode = true; + rxattr->rx_win_ord_mode = true; + rxattr->rej_no_credit = true; + rxattr->tc_mode = VAS_THRESH_DISABLED; + } else if (cop == VAS_COP_TYPE_FTW) { + rxattr->user_win = true; + rxattr->intr_disable = true; + + /* + * As noted in the VAS Workbook we disable credit checks. + * If we enable credit checks in the future, we must also + * implement a mechanism to return the user credits or new + * paste operations will fail. + */ + } +} +EXPORT_SYMBOL_GPL(vas_init_rx_win_attr); + +struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, + struct vas_rx_win_attr *rxattr) +{ + struct pnv_vas_window *rxwin; + struct vas_winctx winctx; + struct vas_instance *vinst; + + trace_vas_rx_win_open(current, vasid, cop, rxattr); + + if (!rx_win_args_valid(cop, rxattr)) + return ERR_PTR(-EINVAL); + + vinst = find_vas_instance(vasid); + if (!vinst) { + pr_devel("vasid %d not found!\n", vasid); + return ERR_PTR(-EINVAL); + } + pr_devel("Found instance %d\n", vasid); + + rxwin = vas_window_alloc(vinst); + if (IS_ERR(rxwin)) { + pr_devel("Unable to allocate memory for Rx window\n"); + return (struct vas_window *)rxwin; + } + + rxwin->tx_win = false; + rxwin->nx_win = rxattr->nx_win; + rxwin->user_win = rxattr->user_win; + rxwin->vas_win.cop = cop; + rxwin->vas_win.wcreds_max = rxattr->wcreds_max; + + init_winctx_for_rxwin(rxwin, rxattr, &winctx); + init_winctx_regs(rxwin, &winctx); + + set_vinst_win(vinst, rxwin); + + return &rxwin->vas_win; +} +EXPORT_SYMBOL_GPL(vas_rx_win_open); + +void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) +{ + memset(txattr, 0, sizeof(*txattr)); + + if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI || + cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) { + txattr->rej_no_credit = false; + txattr->rx_wcred_mode = true; + txattr->tx_wcred_mode = true; + txattr->rx_win_ord_mode = true; + txattr->tx_win_ord_mode = true; + } else if (cop == VAS_COP_TYPE_FTW) { + txattr->user_win = true; + } +} +EXPORT_SYMBOL_GPL(vas_init_tx_win_attr); + +static void init_winctx_for_txwin(struct pnv_vas_window *txwin, + struct vas_tx_win_attr *txattr, + struct vas_winctx *winctx) +{ + /* + * We first zero all fields and only set non-zero ones. Following + * are some fields set to 0/false for the stated reason: + * + * ->notify_os_intr_reg In powernv, send intrs to HV + * ->rsvd_txbuf_count Not supported yet. + * ->notify_disable False for NX windows + * ->xtra_write False for NX windows + * ->notify_early NA for NX windows + * ->lnotify_lpid NA for Tx windows + * ->lnotify_pid NA for Tx windows + * ->lnotify_tid NA for Tx windows + * ->tx_win_cred_mode Ignore for now for NX windows + * ->rx_win_cred_mode Ignore for now for NX windows + */ + memset(winctx, 0, sizeof(struct vas_winctx)); + + winctx->wcreds_max = txwin->vas_win.wcreds_max; + + winctx->user_win = txattr->user_win; + winctx->nx_win = txwin->rxwin->nx_win; + winctx->pin_win = txattr->pin_win; + winctx->rej_no_credit = txattr->rej_no_credit; + winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable; + + winctx->rx_wcred_mode = txattr->rx_wcred_mode; + winctx->tx_wcred_mode = txattr->tx_wcred_mode; + winctx->rx_word_mode = txattr->rx_win_ord_mode; + winctx->tx_word_mode = txattr->tx_win_ord_mode; + winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count; + + winctx->intr_disable = true; + if (winctx->nx_win) + winctx->data_stamp = true; + + winctx->lpid = txattr->lpid; + winctx->pidr = txattr->pidr; + winctx->rx_win_id = txwin->rxwin->vas_win.winid; + /* + * IRQ and fault window setup is successful. Set fault window + * for the send window so that ready to handle faults. + */ + if (txwin->vinst->virq) + winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid; + + winctx->dma_type = VAS_DMA_TYPE_INJECT; + winctx->tc_mode = txattr->tc_mode; + winctx->min_scope = VAS_SCOPE_LOCAL; + winctx->max_scope = VAS_SCOPE_VECTORED_GROUP; + if (txwin->vinst->virq) + winctx->irq_port = txwin->vinst->irq_port; + + winctx->pswid = txattr->pswid ? txattr->pswid : + encode_pswid(txwin->vinst->vas_id, + txwin->vas_win.winid); +} + +static bool tx_win_args_valid(enum vas_cop_type cop, + struct vas_tx_win_attr *attr) +{ + if (attr->tc_mode != VAS_THRESH_DISABLED) + return false; + + if (cop > VAS_COP_TYPE_MAX) + return false; + + if (attr->wcreds_max > VAS_TX_WCREDS_MAX) + return false; + + if (attr->user_win) { + if (attr->rsvd_txbuf_count) + return false; + + if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP && + cop != VAS_COP_TYPE_GZIP_HIPRI) + return false; + } + + return true; +} + +struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, + struct vas_tx_win_attr *attr) +{ + int rc; + struct pnv_vas_window *txwin; + struct pnv_vas_window *rxwin; + struct vas_winctx winctx; + struct vas_instance *vinst; + + trace_vas_tx_win_open(current, vasid, cop, attr); + + if (!tx_win_args_valid(cop, attr)) + return ERR_PTR(-EINVAL); + + /* + * If caller did not specify a vasid but specified the PSWID of a + * receive window (applicable only to FTW windows), use the vasid + * from that receive window. + */ + if (vasid == -1 && attr->pswid) + decode_pswid(attr->pswid, &vasid, NULL); + + vinst = find_vas_instance(vasid); + if (!vinst) { + pr_devel("vasid %d not found!\n", vasid); + return ERR_PTR(-EINVAL); + } + + rxwin = get_vinst_rxwin(vinst, cop, attr->pswid); + if (IS_ERR(rxwin)) { + pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop); + return (struct vas_window *)rxwin; + } + + txwin = vas_window_alloc(vinst); + if (IS_ERR(txwin)) { + rc = PTR_ERR(txwin); + goto put_rxwin; + } + + txwin->vas_win.cop = cop; + txwin->tx_win = 1; + txwin->rxwin = rxwin; + txwin->nx_win = txwin->rxwin->nx_win; + txwin->user_win = attr->user_win; + txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; + + init_winctx_for_txwin(txwin, attr, &winctx); + + init_winctx_regs(txwin, &winctx); + + /* + * If its a kernel send window, map the window address into the + * kernel's address space. For user windows, user must issue an + * mmap() to map the window into their address space. + * + * NOTE: If kernel ever resubmits a user CRB after handling a page + * fault, we will need to map this into kernel as well. + */ + if (!txwin->user_win) { + txwin->paste_kaddr = map_paste_region(txwin); + if (IS_ERR(txwin->paste_kaddr)) { + rc = PTR_ERR(txwin->paste_kaddr); + goto free_window; + } + } else { + /* + * Interrupt hanlder or fault window setup failed. Means + * NX can not generate fault for page fault. So not + * opening for user space tx window. + */ + if (!vinst->virq) { + rc = -ENODEV; + goto free_window; + } + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto free_window; + + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + } + + set_vinst_win(vinst, txwin); + + return &txwin->vas_win; + +free_window: + vas_window_free(txwin); + +put_rxwin: + put_rx_win(rxwin); + return ERR_PTR(rc); + +} +EXPORT_SYMBOL_GPL(vas_tx_win_open); + +int vas_copy_crb(void *crb, int offset) +{ + return vas_copy(crb, offset); +} +EXPORT_SYMBOL_GPL(vas_copy_crb); + +#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) +int vas_paste_crb(struct vas_window *vwin, int offset, bool re) +{ + struct pnv_vas_window *txwin; + int rc; + void *addr; + uint64_t val; + + txwin = container_of(vwin, struct pnv_vas_window, vas_win); + trace_vas_paste_crb(current, txwin); + + /* + * Only NX windows are supported for now and hardware assumes + * report-enable flag is set for NX windows. Ensure software + * complies too. + */ + WARN_ON_ONCE(txwin->nx_win && !re); + + addr = txwin->paste_kaddr; + if (re) { + /* + * Set the REPORT_ENABLE bit (equivalent to writing + * to 1K offset of the paste address) + */ + val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1); + addr += val; + } + + /* + * Map the raw CR value from vas_paste() to an error code (there + * is just pass or fail for now though). + */ + rc = vas_paste(addr, offset); + if (rc == 2) + rc = 0; + else + rc = -EINVAL; + + pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid, + read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_paste_crb); + +/* + * If credit checking is enabled for this window, poll for the return + * of window credits (i.e for NX engines to process any outstanding CRBs). + * Since NX-842 waits for the CRBs to be processed before closing the + * window, we should not have to wait for too long. + * + * TODO: We retry in 10ms intervals now. We could/should probably peek at + * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending + * CRBs on the FIFO and compute the delay dynamically on each retry. + * But that is not really needed until we support NX-GZIP access from + * user space. (NX-842 driver waits for CSB and Fast thread-wakeup + * doesn't use credit checking). + */ +static void poll_window_credits(struct pnv_vas_window *window) +{ + u64 val; + int creds, mode; + int count = 0; + + val = read_hvwc_reg(window, VREG(WINCTL)); + if (window->tx_win) + mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val); + else + mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val); + + if (!mode) + return; +retry: + if (window->tx_win) { + val = read_hvwc_reg(window, VREG(TX_WCRED)); + creds = GET_FIELD(VAS_TX_WCRED, val); + } else { + val = read_hvwc_reg(window, VREG(LRX_WCRED)); + creds = GET_FIELD(VAS_LRX_WCRED, val); + } + + /* + * Takes around few milliseconds to complete all pending requests + * and return credits. + * TODO: Scan fault FIFO and invalidate CRBs points to this window + * and issue CRB Kill to stop all pending requests. Need only + * if there is a bug in NX or fault handling in kernel. + */ + if (creds < window->vas_win.wcreds_max) { + val = 0; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Process can not close send window until all credits are + * returned. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", + vas_window_pid(&window->vas_win), + window->vas_win.winid, + creds, count); + + goto retry; + } +} + +/* + * Wait for the window to go to "not-busy" state. It should only take a + * short time to queue a CRB, so window should not be busy for too long. + * Trying 5ms intervals. + */ +static void poll_window_busy_state(struct pnv_vas_window *window) +{ + int busy; + u64 val; + int count = 0; + +retry: + val = read_hvwc_reg(window, VREG(WIN_STATUS)); + busy = GET_FIELD(VAS_WIN_BUSY, val); + if (busy) { + val = 0; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(msecs_to_jiffies(10)); + count++; + /* + * Takes around few milliseconds to process all pending + * requests. + */ + if (!(count % 1000)) + pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", + vas_window_pid(&window->vas_win), + window->vas_win.winid, count); + + goto retry; + } +} + +/* + * Have the hardware cast a window out of cache and wait for it to + * be completed. + * + * NOTE: It can take a relatively long time to cast the window context + * out of the cache. It is not strictly necessary to cast out if: + * + * - we clear the "Pin Window" bit (so hardware is free to evict) + * + * - we re-initialize the window context when it is reassigned. + * + * We do the former in vas_win_close() and latter in vas_win_open(). + * So, ignoring the cast-out for now. We can add it as needed. If + * casting out becomes necessary we should consider offloading the + * job to a worker thread, so the window close can proceed quickly. + */ +static void poll_window_castout(struct pnv_vas_window *window) +{ + /* stub for now */ +} + +/* + * Unpin and close a window so no new requests are accepted and the + * hardware can evict this window from cache if necessary. + */ +static void unpin_close_window(struct pnv_vas_window *window) +{ + u64 val; + + val = read_hvwc_reg(window, VREG(WINCTL)); + val = SET_FIELD(VAS_WINCTL_PIN, val, 0); + val = SET_FIELD(VAS_WINCTL_OPEN, val, 0); + write_hvwc_reg(window, VREG(WINCTL), val); +} + +/* + * Close a window. + * + * See Section 1.12.1 of VAS workbook v1.05 for details on closing window: + * - Disable new paste operations (unmap paste address) + * - Poll for the "Window Busy" bit to be cleared + * - Clear the Open/Enable bit for the Window. + * - Poll for return of window Credits (implies FIFO empty for Rx win?) + * - Unpin and cast window context out of cache + * + * Besides the hardware, kernel has some bookkeeping of course. + */ +int vas_win_close(struct vas_window *vwin) +{ + struct pnv_vas_window *window; + + if (!vwin) + return 0; + + window = container_of(vwin, struct pnv_vas_window, vas_win); + + if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { + pr_devel("Attempting to close an active Rx window!\n"); + WARN_ON_ONCE(1); + return -EBUSY; + } + + unmap_paste_region(window); + + poll_window_busy_state(window); + + unpin_close_window(window); + + poll_window_credits(window); + + clear_vinst_win(window); + + poll_window_castout(window); + + /* if send window, drop reference to matching receive window */ + if (window->tx_win) { + if (window->user_win) { + mm_context_remove_vas_window(vwin->task_ref.mm); + put_vas_user_win_ref(&vwin->task_ref); + } + put_rx_win(window->rxwin); + } + + vas_window_free(window); + + return 0; +} +EXPORT_SYMBOL_GPL(vas_win_close); + +/* + * Return credit for the given window. + * Send windows and fault window uses credit mechanism as follows: + * + * Send windows: + * - The default number of credits available for each send window is + * 1024. It means 1024 requests can be issued asynchronously at the + * same time. If the credit is not available, that request will be + * returned with RMA_Busy. + * - One credit is taken when NX request is issued. + * - This credit is returned after NX processed that request. + * - If NX encounters translation error, kernel will return the + * credit on the specific send window after processing the fault CRB. + * + * Fault window: + * - The total number credits available is FIFO_SIZE/CRB_SIZE. + * Means 4MB/128 in the current implementation. If credit is not + * available, RMA_Reject is returned. + * - A credit is taken when NX pastes CRB in fault FIFO. + * - The kernel with return credit on fault window after reading entry + * from fault FIFO. + */ +void vas_return_credit(struct pnv_vas_window *window, bool tx) +{ + uint64_t val; + + val = 0ULL; + if (tx) { /* send window */ + val = SET_FIELD(VAS_TX_WCRED, val, 1); + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val); + } else { + val = SET_FIELD(VAS_LRX_WCRED, val, 1); + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val); + } +} + +struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid) +{ + struct pnv_vas_window *window; + int winid; + + if (!pswid) { + pr_devel("%s: called for pswid 0!\n", __func__); + return ERR_PTR(-ESRCH); + } + + decode_pswid(pswid, NULL, &winid); + + if (winid >= VAS_WINDOWS_PER_CHIP) + return ERR_PTR(-ESRCH); + + /* + * If application closes the window before the hardware + * returns the fault CRB, we should wait in vas_win_close() + * for the pending requests. so the window must be active + * and the process alive. + * + * If its a kernel process, we should not get any faults and + * should not get here. + */ + window = vinst->windows[winid]; + + if (!window) { + pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n", + winid, pswid, vinst); + return NULL; + } + + /* + * Do some sanity checks on the decoded window. Window should be + * NX GZIP user send window. FTW windows should not incur faults + * since their CRBs are ignored (not queued on FIFO or processed + * by NX). + */ + if (!window->tx_win || !window->user_win || !window->nx_win || + window->vas_win.cop == VAS_COP_TYPE_FAULT || + window->vas_win.cop == VAS_COP_TYPE_FTW) { + pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", + winid, window->tx_win, window->user_win, + window->nx_win, window->vas_win.cop); + WARN_ON(1); + } + + return window; +} + +static struct vas_window *vas_user_win_open(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + struct vas_tx_win_attr txattr = {}; + + vas_init_tx_win_attr(&txattr, cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + return vas_tx_win_open(vas_id, cop_type, &txattr); +} + +static u64 vas_user_win_paste_addr(struct vas_window *txwin) +{ + struct pnv_vas_window *win; + u64 paste_addr; + + win = container_of(txwin, struct pnv_vas_window, vas_win); + vas_win_paste_addr(win, &paste_addr, NULL); + + return paste_addr; +} + +static int vas_user_win_close(struct vas_window *txwin) +{ + vas_win_close(txwin); + + return 0; +} + +static const struct vas_user_win_ops vops = { + .open_win = vas_user_win_open, + .paste_addr = vas_user_win_paste_addr, + .close_win = vas_user_win_close, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + + return vas_register_coproc_api(mod, cop_type, name, &vops); +} +EXPORT_SYMBOL_GPL(vas_register_api_powernv); + +void vas_unregister_api_powernv(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_powernv); diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c new file mode 100644 index 0000000000..b65256a63e --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2016-17 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vas.h" + +DEFINE_MUTEX(vas_mutex); +static LIST_HEAD(vas_instances); + +static DEFINE_PER_CPU(int, cpu_vas_id); + +static int vas_irq_fault_window_setup(struct vas_instance *vinst) +{ + int rc = 0; + + rc = request_threaded_irq(vinst->virq, vas_fault_handler, + vas_fault_thread_fn, 0, vinst->name, vinst); + + if (rc) { + pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n", + vinst->vas_id, vinst->virq, rc); + goto out; + } + + rc = vas_setup_fault_window(vinst); + if (rc) + free_irq(vinst->virq, vinst); + +out: + return rc; +} + +static int init_vas_instance(struct platform_device *pdev) +{ + struct device_node *dn = pdev->dev.of_node; + struct vas_instance *vinst; + struct xive_irq_data *xd; + uint32_t chipid, hwirq; + struct resource *res; + int rc, cpu, vasid; + + rc = of_property_read_u32(dn, "ibm,vas-id", &vasid); + if (rc) { + pr_err("No ibm,vas-id property for %s?\n", pdev->name); + return -ENODEV; + } + + rc = of_property_read_u32(dn, "ibm,chip-id", &chipid); + if (rc) { + pr_err("No ibm,chip-id property for %s?\n", pdev->name); + return -ENODEV; + } + + if (pdev->num_resources != 4) { + pr_err("Unexpected DT configuration for [%s, %d]\n", + pdev->name, vasid); + return -ENODEV; + } + + vinst = kzalloc(sizeof(*vinst), GFP_KERNEL); + if (!vinst) + return -ENOMEM; + + vinst->name = kasprintf(GFP_KERNEL, "vas-%d", vasid); + if (!vinst->name) { + kfree(vinst); + return -ENOMEM; + } + + INIT_LIST_HEAD(&vinst->node); + ida_init(&vinst->ida); + mutex_init(&vinst->mutex); + vinst->vas_id = vasid; + vinst->pdev = pdev; + + res = &pdev->resource[0]; + vinst->hvwc_bar_start = res->start; + + res = &pdev->resource[1]; + vinst->uwc_bar_start = res->start; + + res = &pdev->resource[2]; + vinst->paste_base_addr = res->start; + + res = &pdev->resource[3]; + if (res->end > 62) { + pr_err("Bad 'paste_win_id_shift' in DT, %llx\n", res->end); + goto free_vinst; + } + + vinst->paste_win_id_shift = 63 - res->end; + + hwirq = xive_native_alloc_irq_on_chip(chipid); + if (!hwirq) { + pr_err("Inst%d: Unable to allocate global irq for chip %d\n", + vinst->vas_id, chipid); + return -ENOENT; + } + + vinst->virq = irq_create_mapping(NULL, hwirq); + if (!vinst->virq) { + pr_err("Inst%d: Unable to map global irq %d\n", + vinst->vas_id, hwirq); + return -EINVAL; + } + + xd = irq_get_handler_data(vinst->virq); + if (!xd) { + pr_err("Inst%d: Invalid virq %d\n", + vinst->vas_id, vinst->virq); + return -EINVAL; + } + + vinst->irq_port = xd->trig_page; + pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n", + pdev->name, vasid, vinst->paste_base_addr, + vinst->paste_win_id_shift, vinst->virq, + vinst->irq_port); + + for_each_possible_cpu(cpu) { + if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn)) + per_cpu(cpu_vas_id, cpu) = vasid; + } + + mutex_lock(&vas_mutex); + list_add(&vinst->node, &vas_instances); + mutex_unlock(&vas_mutex); + + spin_lock_init(&vinst->fault_lock); + /* + * IRQ and fault handling setup is needed only for user space + * send windows. + */ + if (vinst->virq) { + rc = vas_irq_fault_window_setup(vinst); + /* + * Fault window is used only for user space send windows. + * So if vinst->virq is NULL, tx_win_open returns -ENODEV + * for user space. + */ + if (rc) + vinst->virq = 0; + } + + vas_instance_init_dbgdir(vinst); + + dev_set_drvdata(&pdev->dev, vinst); + + return 0; + +free_vinst: + kfree(vinst->name); + kfree(vinst); + return -ENODEV; + +} + +/* + * Although this is read/used multiple times, it is written to only + * during initialization. + */ +struct vas_instance *find_vas_instance(int vasid) +{ + struct list_head *ent; + struct vas_instance *vinst; + + mutex_lock(&vas_mutex); + + if (vasid == -1) + vasid = per_cpu(cpu_vas_id, smp_processor_id()); + + list_for_each(ent, &vas_instances) { + vinst = list_entry(ent, struct vas_instance, node); + if (vinst->vas_id == vasid) { + mutex_unlock(&vas_mutex); + return vinst; + } + } + mutex_unlock(&vas_mutex); + + pr_devel("Instance %d not found\n", vasid); + return NULL; +} + +int chip_to_vas_id(int chipid) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu_to_chip_id(cpu) == chipid) + return per_cpu(cpu_vas_id, cpu); + } + return -1; +} +EXPORT_SYMBOL(chip_to_vas_id); + +static int vas_probe(struct platform_device *pdev) +{ + return init_vas_instance(pdev); +} + +static const struct of_device_id powernv_vas_match[] = { + { .compatible = "ibm,vas",}, + {}, +}; + +static struct platform_driver vas_driver = { + .driver = { + .name = "vas", + .of_match_table = powernv_vas_match, + }, + .probe = vas_probe, +}; + +static int __init vas_init(void) +{ + int found = 0; + struct device_node *dn; + + platform_driver_register(&vas_driver); + + for_each_compatible_node(dn, NULL, "ibm,vas") { + of_platform_device_create(dn, NULL, NULL); + found++; + } + + if (!found) { + platform_driver_unregister(&vas_driver); + return -ENODEV; + } + + pr_devel("Found %d instances\n", found); + + return 0; +} +device_initcall(vas_init); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h new file mode 100644 index 0000000000..08d9d3d5a2 --- /dev/null +++ b/arch/powerpc/platforms/powernv/vas.h @@ -0,0 +1,501 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2016-17 IBM Corp. + */ + +#ifndef _VAS_H +#define _VAS_H +#include +#include +#include +#include +#include +#include +#include + +/* + * Overview of Virtual Accelerator Switchboard (VAS). + * + * VAS is a hardware "switchboard" that allows senders and receivers to + * exchange messages with _minimal_ kernel involvment. The receivers are + * typically NX coprocessor engines that perform compression or encryption + * in hardware, but receivers can also be other software threads. + * + * Senders are user/kernel threads that submit compression/encryption or + * other requests to the receivers. Senders must format their messages as + * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and + * "paste" instructions which were introduced in Power9. + * + * A Power node can have (upto?) 8 Power chips. There is one instance of + * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports, + * Senders and receivers must each connect to a separate window before they + * can exchange messages through the switchboard. + * + * Each window is described by two types of window contexts: + * + * Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes + * + * OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes. + * + * A window context can be viewed as a set of 64-bit registers. The settings + * in these registers configure/control/determine the behavior of the VAS + * hardware when messages are sent/received through the window. The registers + * in the HVWC are configured by the kernel while the registers in the UWC can + * be configured by the kernel or by the user space application that is using + * the window. + * + * The HVWCs for all windows on a specific instance of VAS are in a contiguous + * range of hardware addresses or Base address region (BAR) referred to as the + * HVWC BAR for the instance. Similarly the UWCs for all windows on an instance + * are referred to as the UWC BAR for the instance. + * + * The two BARs for each instance are defined Power9 MMIO Ranges spreadsheet + * and available to the kernel in the VAS node's "reg" property in the device + * tree: + * + * /proc/device-tree/vasm@.../reg + * + * (see vas_probe() for details on the reg property). + * + * The kernel maps the HVWC and UWC BAR regions into the kernel address + * space (hvwc_map and uwc_map). The kernel can then access the window + * contexts of a specific window using: + * + * hvwc = hvwc_map + winid * VAS_HVWC_SIZE. + * uwc = uwc_map + winid * VAS_UWC_SIZE. + * + * where winid is the window index (0..64K). + * + * As mentioned, a window context is used to "configure" a window. Besides + * this configuration address, each _send_ window also has a unique hardware + * "paste" address that is used to submit requests/CRBs (see vas_paste_crb()). + * + * The hardware paste address for a window is computed using the "paste + * base address" and "paste win id shift" reg properties in the VAS device + * tree node using: + * + * paste_addr = paste_base + ((winid << paste_win_id_shift)) + * + * (again, see vas_probe() for ->paste_base_addr and ->paste_win_id_shift). + * + * The kernel maps this hardware address into the sender's address space + * after which they can use the 'paste' instruction (new in Power9) to + * send a message (submit a request aka CRB) to the coprocessor. + * + * NOTE: In the initial version, senders can only in-kernel drivers/threads. + * Support for user space threads will be added in follow-on patches. + * + * TODO: Do we need to map the UWC into user address space so they can return + * credits? Its NA for NX but may be needed for other receive windows. + * + */ + +#define VAS_WINDOWS_PER_CHIP (64 << 10) + +/* + * Hypervisor and OS/USer Window Context sizes + */ +#define VAS_HVWC_SIZE 512 +#define VAS_UWC_SIZE PAGE_SIZE + +/* + * Initial per-process credits. + * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED) + * + * TODO: Needs tuning for per-process credits + */ +#define VAS_TX_WCREDS_MAX ((4 << 10) - 1) +#define VAS_WCREDS_DEFAULT (1 << 10) + +/* + * VAS Window Context Register Offsets and bitmasks. + * See Section 3.1.4 of VAS Work book + */ +#define VAS_LPID_OFFSET 0x010 +#define VAS_LPID PPC_BITMASK(0, 11) + +#define VAS_PID_OFFSET 0x018 +#define VAS_PID_ID PPC_BITMASK(0, 19) + +#define VAS_XLATE_MSR_OFFSET 0x020 +#define VAS_XLATE_MSR_DR PPC_BIT(0) +#define VAS_XLATE_MSR_TA PPC_BIT(1) +#define VAS_XLATE_MSR_PR PPC_BIT(2) +#define VAS_XLATE_MSR_US PPC_BIT(3) +#define VAS_XLATE_MSR_HV PPC_BIT(4) +#define VAS_XLATE_MSR_SF PPC_BIT(5) + +#define VAS_XLATE_LPCR_OFFSET 0x028 +#define VAS_XLATE_LPCR_PAGE_SIZE PPC_BITMASK(0, 2) +#define VAS_XLATE_LPCR_ISL PPC_BIT(3) +#define VAS_XLATE_LPCR_TC PPC_BIT(4) +#define VAS_XLATE_LPCR_SC PPC_BIT(5) + +#define VAS_XLATE_CTL_OFFSET 0x030 +#define VAS_XLATE_MODE PPC_BITMASK(0, 1) + +#define VAS_AMR_OFFSET 0x040 +#define VAS_AMR PPC_BITMASK(0, 63) + +#define VAS_SEIDR_OFFSET 0x048 +#define VAS_SEIDR PPC_BITMASK(0, 63) + +#define VAS_FAULT_TX_WIN_OFFSET 0x050 +#define VAS_FAULT_TX_WIN PPC_BITMASK(48, 63) + +#define VAS_OSU_INTR_SRC_RA_OFFSET 0x060 +#define VAS_OSU_INTR_SRC_RA PPC_BITMASK(8, 63) + +#define VAS_HV_INTR_SRC_RA_OFFSET 0x070 +#define VAS_HV_INTR_SRC_RA PPC_BITMASK(8, 63) + +#define VAS_PSWID_OFFSET 0x078 +#define VAS_PSWID_EA_HANDLE PPC_BITMASK(0, 31) + +#define VAS_SPARE1_OFFSET 0x080 +#define VAS_SPARE2_OFFSET 0x088 +#define VAS_SPARE3_OFFSET 0x090 +#define VAS_SPARE4_OFFSET 0x130 +#define VAS_SPARE5_OFFSET 0x160 +#define VAS_SPARE6_OFFSET 0x188 + +#define VAS_LFIFO_BAR_OFFSET 0x0A0 +#define VAS_LFIFO_BAR PPC_BITMASK(8, 53) +#define VAS_PAGE_MIGRATION_SELECT PPC_BITMASK(54, 56) + +#define VAS_LDATA_STAMP_CTL_OFFSET 0x0A8 +#define VAS_LDATA_STAMP PPC_BITMASK(0, 1) +#define VAS_XTRA_WRITE PPC_BIT(2) + +#define VAS_LDMA_CACHE_CTL_OFFSET 0x0B0 +#define VAS_LDMA_TYPE PPC_BITMASK(0, 1) +#define VAS_LDMA_FIFO_DISABLE PPC_BIT(2) + +#define VAS_LRFIFO_PUSH_OFFSET 0x0B8 +#define VAS_LRFIFO_PUSH PPC_BITMASK(0, 15) + +#define VAS_CURR_MSG_COUNT_OFFSET 0x0C0 +#define VAS_CURR_MSG_COUNT PPC_BITMASK(0, 7) + +#define VAS_LNOTIFY_AFTER_COUNT_OFFSET 0x0C8 +#define VAS_LNOTIFY_AFTER_COUNT PPC_BITMASK(0, 7) + +#define VAS_LRX_WCRED_OFFSET 0x0E0 +#define VAS_LRX_WCRED PPC_BITMASK(0, 15) + +#define VAS_LRX_WCRED_ADDER_OFFSET 0x190 +#define VAS_LRX_WCRED_ADDER PPC_BITMASK(0, 15) + +#define VAS_TX_WCRED_OFFSET 0x0F0 +#define VAS_TX_WCRED PPC_BITMASK(4, 15) + +#define VAS_TX_WCRED_ADDER_OFFSET 0x1A0 +#define VAS_TX_WCRED_ADDER PPC_BITMASK(4, 15) + +#define VAS_LFIFO_SIZE_OFFSET 0x100 +#define VAS_LFIFO_SIZE PPC_BITMASK(0, 3) + +#define VAS_WINCTL_OFFSET 0x108 +#define VAS_WINCTL_OPEN PPC_BIT(0) +#define VAS_WINCTL_REJ_NO_CREDIT PPC_BIT(1) +#define VAS_WINCTL_PIN PPC_BIT(2) +#define VAS_WINCTL_TX_WCRED_MODE PPC_BIT(3) +#define VAS_WINCTL_RX_WCRED_MODE PPC_BIT(4) +#define VAS_WINCTL_TX_WORD_MODE PPC_BIT(5) +#define VAS_WINCTL_RX_WORD_MODE PPC_BIT(6) +#define VAS_WINCTL_RSVD_TXBUF PPC_BIT(7) +#define VAS_WINCTL_THRESH_CTL PPC_BITMASK(8, 9) +#define VAS_WINCTL_FAULT_WIN PPC_BIT(10) +#define VAS_WINCTL_NX_WIN PPC_BIT(11) + +#define VAS_WIN_STATUS_OFFSET 0x110 +#define VAS_WIN_BUSY PPC_BIT(1) + +#define VAS_WIN_CTX_CACHING_CTL_OFFSET 0x118 +#define VAS_CASTOUT_REQ PPC_BIT(0) +#define VAS_PUSH_TO_MEM PPC_BIT(1) +#define VAS_WIN_CACHE_STATUS PPC_BIT(4) + +#define VAS_TX_RSVD_BUF_COUNT_OFFSET 0x120 +#define VAS_RXVD_BUF_COUNT PPC_BITMASK(58, 63) + +#define VAS_LRFIFO_WIN_PTR_OFFSET 0x128 +#define VAS_LRX_WIN_ID PPC_BITMASK(0, 15) + +/* + * Local Notification Control Register controls what happens in _response_ + * to a paste command and hence applies only to receive windows. + */ +#define VAS_LNOTIFY_CTL_OFFSET 0x138 +#define VAS_NOTIFY_DISABLE PPC_BIT(0) +#define VAS_INTR_DISABLE PPC_BIT(1) +#define VAS_NOTIFY_EARLY PPC_BIT(2) +#define VAS_NOTIFY_OSU_INTR PPC_BIT(3) + +#define VAS_LNOTIFY_PID_OFFSET 0x140 +#define VAS_LNOTIFY_PID PPC_BITMASK(0, 19) + +#define VAS_LNOTIFY_LPID_OFFSET 0x148 +#define VAS_LNOTIFY_LPID PPC_BITMASK(0, 11) + +#define VAS_LNOTIFY_TID_OFFSET 0x150 +#define VAS_LNOTIFY_TID PPC_BITMASK(0, 15) + +#define VAS_LNOTIFY_SCOPE_OFFSET 0x158 +#define VAS_LNOTIFY_MIN_SCOPE PPC_BITMASK(0, 1) +#define VAS_LNOTIFY_MAX_SCOPE PPC_BITMASK(2, 3) + +#define VAS_NX_UTIL_OFFSET 0x1B0 +#define VAS_NX_UTIL PPC_BITMASK(0, 63) + +/* SE: Side effects */ +#define VAS_NX_UTIL_SE_OFFSET 0x1B8 +#define VAS_NX_UTIL_SE PPC_BITMASK(0, 63) + +#define VAS_NX_UTIL_ADDER_OFFSET 0x180 +#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63) + +/* + * VREG(x): + * Expand a register's short name (eg: LPID) into two parameters: + * - the register's short name in string form ("LPID"), and + * - the name of the macro (eg: VAS_LPID_OFFSET), defining the + * register's offset in the window context + */ +#define VREG_SFX(n, s) __stringify(n), VAS_##n##s +#define VREG(r) VREG_SFX(r, _OFFSET) + +/* + * Local Notify Scope Control Register. (Receive windows only). + */ +enum vas_notify_scope { + VAS_SCOPE_LOCAL, + VAS_SCOPE_GROUP, + VAS_SCOPE_VECTORED_GROUP, + VAS_SCOPE_UNUSED, +}; + +/* + * Local DMA Cache Control Register (Receive windows only). + */ +enum vas_dma_type { + VAS_DMA_TYPE_INJECT, + VAS_DMA_TYPE_WRITE, +}; + +/* + * Local Notify Scope Control Register. (Receive windows only). + * Not applicable to NX receive windows. + */ +enum vas_notify_after_count { + VAS_NOTIFY_AFTER_256 = 0, + VAS_NOTIFY_NONE, + VAS_NOTIFY_AFTER_2 +}; + +/* + * NX can generate an interrupt for multiple faults and expects kernel + * to process all of them. So read all valid CRB entries until find the + * invalid one. So use pswid which is pasted by NX and ccw[0] (reserved + * bit in BE) to check valid CRB. CCW[0] will not be touched by user + * space. Application gets CRB formt error if it updates this bit. + * + * Invalidate FIFO during allocation and process all entries from last + * successful read until finds invalid pswid and ccw[0] values. + * After reading each CRB entry from fault FIFO, the kernel invalidate + * it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with + * CCW0_INVALID. + */ +#define FIFO_INVALID_ENTRY 0xffffffff +#define CCW0_INVALID 1 + +/* + * One per instance of VAS. Each instance will have a separate set of + * receive windows, one per coprocessor type. + * + * See also function header of set_vinst_win() for details on ->windows[] + * and ->rxwin[] tables. + */ +struct vas_instance { + int vas_id; + struct ida ida; + struct list_head node; + struct platform_device *pdev; + + u64 hvwc_bar_start; + u64 uwc_bar_start; + u64 paste_base_addr; + u64 paste_win_id_shift; + + u64 irq_port; + int virq; + int fault_crbs; + int fault_fifo_size; + int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ + spinlock_t fault_lock; /* Protects fifo_in_progress update */ + void *fault_fifo; + struct pnv_vas_window *fault_win; /* Fault window */ + + struct mutex mutex; + struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX]; + struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP]; + + char *name; + char *dbgname; + struct dentry *dbgdir; +}; + +/* + * In-kernel state a VAS window on PowerNV. One per window. + */ +struct pnv_vas_window { + struct vas_window vas_win; + /* Fields common to send and receive windows */ + struct vas_instance *vinst; + bool tx_win; /* True if send window */ + bool nx_win; /* True if NX window */ + bool user_win; /* True if user space window */ + void *hvwc_map; /* HV window context */ + void *uwc_map; /* OS/User window context */ + + /* Fields applicable only to send windows */ + void *paste_kaddr; + char *paste_addr_name; + struct pnv_vas_window *rxwin; + + /* Fields applicable only to receive windows */ + atomic_t num_txwins; +}; + +/* + * Container for the hardware state of a window. One per-window. + * + * A VAS Window context is a 512-byte area in the hardware that contains + * a set of 64-bit registers. Individual bit-fields in these registers + * determine the configuration/operation of the hardware. struct vas_winctx + * is a container for the register fields in the window context. + */ +struct vas_winctx { + u64 rx_fifo; + int rx_fifo_size; + int wcreds_max; + int rsvd_txbuf_count; + + bool user_win; + bool nx_win; + bool fault_win; + bool rsvd_txbuf_enable; + bool pin_win; + bool rej_no_credit; + bool tx_wcred_mode; + bool rx_wcred_mode; + bool tx_word_mode; + bool rx_word_mode; + bool data_stamp; + bool xtra_write; + bool notify_disable; + bool intr_disable; + bool fifo_disable; + bool notify_early; + bool notify_os_intr_reg; + + int lpid; + int pidr; /* value from SPRN_PID, not linux pid */ + int lnotify_lpid; + int lnotify_pid; + int lnotify_tid; + u32 pswid; + int rx_win_id; + int fault_win_id; + int tc_mode; + + u64 irq_port; + + enum vas_dma_type dma_type; + enum vas_notify_scope min_scope; + enum vas_notify_scope max_scope; + enum vas_notify_after_count notify_after_count; +}; + +extern struct mutex vas_mutex; + +extern struct vas_instance *find_vas_instance(int vasid); +extern void vas_init_dbgdir(void); +extern void vas_instance_init_dbgdir(struct vas_instance *vinst); +extern void vas_window_init_dbgdir(struct pnv_vas_window *win); +extern void vas_window_free_dbgdir(struct pnv_vas_window *win); +extern int vas_setup_fault_window(struct vas_instance *vinst); +extern irqreturn_t vas_fault_thread_fn(int irq, void *data); +extern irqreturn_t vas_fault_handler(int irq, void *dev_id); +extern void vas_return_credit(struct pnv_vas_window *window, bool tx); +extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, + uint32_t pswid); +extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, + int *len); + +static inline int vas_window_pid(struct vas_window *window) +{ + return pid_vnr(window->task_ref.pid); +} + +static inline void vas_log_write(struct pnv_vas_window *win, char *name, + void *regptr, u64 val) +{ + if (val) + pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n", + win->tx_win ? "Tx" : "Rx", win->vas_win.winid, + name, regptr, val); +} + +static inline void write_uwc_reg(struct pnv_vas_window *win, char *name, + s32 reg, u64 val) +{ + void *regptr; + + regptr = win->uwc_map + reg; + vas_log_write(win, name, regptr, val); + + out_be64(regptr, val); +} + +static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name, + s32 reg, u64 val) +{ + void *regptr; + + regptr = win->hvwc_map + reg; + vas_log_write(win, name, regptr, val); + + out_be64(regptr, val); +} + +static inline u64 read_hvwc_reg(struct pnv_vas_window *win, + char *name __maybe_unused, s32 reg) +{ + return in_be64(win->hvwc_map+reg); +} + +/* + * Encode/decode the Partition Send Window ID (PSWID) for a window in + * a way that we can uniquely identify any window in the system. i.e. + * we should be able to locate the 'struct vas_window' given the PSWID. + * + * Bits Usage + * 0:7 VAS id (8 bits) + * 8:15 Unused, 0 (3 bits) + * 16:31 Window id (16 bits) + */ +static inline u32 encode_pswid(int vasid, int winid) +{ + return ((u32)winid | (vasid << (31 - 7))); +} + +static inline void decode_pswid(u32 pswid, int *vasid, int *winid) +{ + if (vasid) + *vasid = pswid >> (31 - 7) & 0xFF; + + if (winid) + *winid = pswid & 0xFFFF; +} +#endif /* _VAS_H */ diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig new file mode 100644 index 0000000000..a44869e5ea --- /dev/null +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -0,0 +1,182 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_PS3 + bool "Sony PS3" + depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN + select PPC_CELL + select USB_OHCI_LITTLE_ENDIAN + select USB_OHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_MMIO + select HAVE_PCI + select IRQ_DOMAIN_NOMAP + help + This option enables support for the Sony PS3 game console + and other platforms using the PS3 hypervisor. Enabling this + option will allow building otheros.bld, a kernel image suitable + for programming into flash memory, and vmlinux, a kernel image + suitable for loading via kexec. + +menu "PS3 Platform Options" + depends on PPC_PS3 + +config PS3_ADVANCED + depends on PPC_PS3 + bool "PS3 Advanced configuration options" + help + This gives you access to some advanced options for the PS3. The + defaults should be fine for most users, but these options may make + it possible to better control the kernel configuration if you know + what you are doing. + + Note that the answer to this question won't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about these options. + + Most users should say N to this question. + +config PS3_HTAB_SIZE + depends on PPC_PS3 + int "PS3 Platform pagetable size" if PS3_ADVANCED + range 18 20 + default 20 + help + This option is only for experts who may have the desire to fine + tune the pagetable size on their system. The value here is + expressed as the log2 of the page table size. Valid values are + 18, 19, and 20, corresponding to 256KB, 512KB and 1MB respectively. + + If unsure, choose the default (20) with the confidence that your + system will have optimal runtime performance. + +config PS3_DYNAMIC_DMA + depends on PPC_PS3 + bool "PS3 Platform dynamic DMA page table management" + help + This option will enable kernel support to take advantage of the + per device dynamic DMA page table management provided by the Cell + processor's IO Controller. This support incurs some runtime + overhead and also slightly increases kernel memory usage. The + current implementation should be considered experimental. + + This support is mainly for Linux kernel development. If unsure, + say N. + +config PS3_VUART + depends on PPC_PS3 + tristate + +config PS3_PS3AV + depends on PPC_PS3 + tristate "PS3 AV settings driver" if PS3_ADVANCED + select PS3_VUART + default y + help + Include support for the PS3 AV Settings driver. + + This support is required for PS3 graphics and sound. In + general, all users will say Y or M. + +config PS3_SYS_MANAGER + depends on PPC_PS3 + tristate "PS3 System Manager driver" if PS3_ADVANCED + select PS3_VUART + default y + help + Include support for the PS3 System Manager. + + This support is required for PS3 system control. In + general, all users will say Y or M. + +config PS3_VERBOSE_RESULT + bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED + depends on PPC_PS3 + help + Enables more verbose log messages for LV1 hypercall results. + + If in doubt, say N here and reduce the size of the kernel by a + small amount. + +config PS3_REPOSITORY_WRITE + bool "PS3 Repository write support" if PS3_ADVANCED + depends on PPC_PS3 + help + Enables support for writing to the PS3 System Repository. + + This support is intended for bootloaders that need to store data + in the repository for later boot stages. + + If in doubt, say N here and reduce the size of the kernel by a + small amount. + +config PS3_STORAGE + depends on PPC_PS3 + tristate + +config PS3_DISK + tristate "PS3 Disk Storage Driver" + depends on PPC_PS3 && BLOCK + select PS3_STORAGE + help + Include support for the PS3 Disk Storage. + + This support is required to access the PS3 hard disk. + In general, all users will say Y or M. + +config PS3_ROM + tristate "PS3 BD/DVD/CD-ROM Storage Driver" + depends on PPC_PS3 && SCSI + select PS3_STORAGE + help + Include support for the PS3 ROM Storage. + + This support is required to access the PS3 BD/DVD/CD-ROM drive. + In general, all users will say Y or M. + Also make sure to say Y or M to "SCSI CDROM support" later. + +config PS3_FLASH + tristate "PS3 FLASH ROM Storage Driver" + depends on PPC_PS3 + select PS3_STORAGE + help + Include support for the PS3 FLASH ROM Storage. + + This support is required to access the PS3 FLASH ROM, which + contains the boot loader and some boot options. + In general, PS3 OtherOS users will say Y or M. + + As this driver needs a fixed buffer of 256 KiB of memory, it can + be disabled on the kernel command line using "ps3flash=off", to + not allocate this fixed buffer. + +config PS3_VRAM + tristate "PS3 Video RAM Storage Driver" + depends on FB_PS3=y && BLOCK && m + help + This driver allows you to use excess PS3 video RAM as volatile + storage or system swap. + +config PS3_LPM + tristate "PS3 Logical Performance Monitor support" + depends on PPC_PS3 + help + Include support for the PS3 Logical Performance Monitor. + + This support is required to use the logical performance monitor + of the PS3's LV1 hypervisor. + + If you intend to use the advanced performance monitoring and + profiling support of the Cell processor with programs like + perfmon2, then say Y or M, otherwise say N. + +config PS3GELIC_UDBG + bool "PS3 udbg output via UDP broadcasts on Ethernet" + depends on PPC_PS3 + help + Enables udbg early debugging output by sending broadcast UDP + via the Ethernet port (UDP port number 18194). + + This driver uses a trivial implementation and is independent + from the main PS3 gelic network driver. + + If in doubt, say N here. + +endmenu diff --git a/arch/powerpc/platforms/ps3/Makefile b/arch/powerpc/platforms/ps3/Makefile new file mode 100644 index 0000000000..86bf2967a8 --- /dev/null +++ b/arch/powerpc/platforms/ps3/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y += setup.o mm.o time.o hvcall.o htab.o repository.o +obj-y += interrupt.o exports.o os-area.o +obj-y += system-bus.o + +obj-$(CONFIG_PS3GELIC_UDBG) += gelic_udbg.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SPU_BASE) += spu.o +obj-y += device-init.o diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c new file mode 100644 index 0000000000..e87360a0fb --- /dev/null +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -0,0 +1,975 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 device registration routines. + * + * Copyright (C) 2007 Sony Computer Entertainment Inc. + * Copyright 2007 Sony Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "platform.h" + +static int __init ps3_register_lpm_devices(void) +{ + int result; + u64 tmp1; + u64 tmp2; + struct ps3_system_bus_device *dev; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->match_id = PS3_MATCH_ID_LPM; + dev->dev_type = PS3_DEVICE_TYPE_LPM; + + /* The current lpm driver only supports a single BE processor. */ + + result = ps3_repository_read_be_node_id(0, &dev->lpm.node_id); + + if (result) { + pr_debug("%s:%d: ps3_repository_read_be_node_id failed \n", + __func__, __LINE__); + goto fail_read_repo; + } + + result = ps3_repository_read_lpm_privileges(dev->lpm.node_id, &tmp1, + &dev->lpm.rights); + + if (result) { + pr_debug("%s:%d: ps3_repository_read_lpm_privileges failed\n", + __func__, __LINE__); + goto fail_read_repo; + } + + lv1_get_logical_partition_id(&tmp2); + + if (tmp1 != tmp2) { + pr_debug("%s:%d: wrong lpar\n", + __func__, __LINE__); + result = -ENODEV; + goto fail_rights; + } + + if (!(dev->lpm.rights & PS3_LPM_RIGHTS_USE_LPM)) { + pr_debug("%s:%d: don't have rights to use lpm\n", + __func__, __LINE__); + result = -EPERM; + goto fail_rights; + } + + pr_debug("%s:%d: pu_id %llu, rights %llu(%llxh)\n", + __func__, __LINE__, dev->lpm.pu_id, dev->lpm.rights, + dev->lpm.rights); + + result = ps3_repository_read_pu_id(0, &dev->lpm.pu_id); + + if (result) { + pr_debug("%s:%d: ps3_repository_read_pu_id failed \n", + __func__, __LINE__); + goto fail_read_repo; + } + + result = ps3_system_bus_device_register(dev); + + if (result) { + pr_debug("%s:%d ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_register; + } + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return 0; + + +fail_register: +fail_rights: +fail_read_repo: + kfree(dev); + pr_debug(" <- %s:%d: failed\n", __func__, __LINE__); + return result; +} + +/** + * ps3_setup_gelic_device - Setup and register a gelic device instance. + * + * Allocates memory for a struct ps3_system_bus_device instance, initialises the + * structure members, and registers the device instance with the system bus. + */ + +static int __init ps3_setup_gelic_device( + const struct ps3_repository_device *repo) +{ + int result; + struct layout { + struct ps3_system_bus_device dev; + struct ps3_dma_region d_region; + } *p; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB); + BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_GELIC); + + p = kzalloc(sizeof(struct layout), GFP_KERNEL); + + if (!p) { + result = -ENOMEM; + goto fail_malloc; + } + + p->dev.match_id = PS3_MATCH_ID_GELIC; + p->dev.dev_type = PS3_DEVICE_TYPE_SB; + p->dev.bus_id = repo->bus_id; + p->dev.dev_id = repo->dev_id; + p->dev.d_region = &p->d_region; + + result = ps3_repository_find_interrupt(repo, + PS3_INTERRUPT_TYPE_EVENT_PORT, &p->dev.interrupt_id); + + if (result) { + pr_debug("%s:%d ps3_repository_find_interrupt failed\n", + __func__, __LINE__); + goto fail_find_interrupt; + } + + BUG_ON(p->dev.interrupt_id != 0); + + result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K, + PS3_DMA_OTHER, NULL, 0); + + if (result) { + pr_debug("%s:%d ps3_dma_region_init failed\n", + __func__, __LINE__); + goto fail_dma_init; + } + + result = ps3_system_bus_device_register(&p->dev); + + if (result) { + pr_debug("%s:%d ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_device_register; + } + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return result; + +fail_device_register: +fail_dma_init: +fail_find_interrupt: + kfree(p); +fail_malloc: + pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__); + return result; +} + +static int __ref ps3_setup_uhc_device( + const struct ps3_repository_device *repo, enum ps3_match_id match_id, + enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type) +{ + int result; + struct layout { + struct ps3_system_bus_device dev; + struct ps3_dma_region d_region; + struct ps3_mmio_region m_region; + } *p; + u64 bus_addr; + u64 len; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB); + BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_USB); + + p = kzalloc(sizeof(struct layout), GFP_KERNEL); + + if (!p) { + result = -ENOMEM; + goto fail_malloc; + } + + p->dev.match_id = match_id; + p->dev.dev_type = PS3_DEVICE_TYPE_SB; + p->dev.bus_id = repo->bus_id; + p->dev.dev_id = repo->dev_id; + p->dev.d_region = &p->d_region; + p->dev.m_region = &p->m_region; + + result = ps3_repository_find_interrupt(repo, + interrupt_type, &p->dev.interrupt_id); + + if (result) { + pr_debug("%s:%d ps3_repository_find_interrupt failed\n", + __func__, __LINE__); + goto fail_find_interrupt; + } + + result = ps3_repository_find_reg(repo, reg_type, + &bus_addr, &len); + + if (result) { + pr_debug("%s:%d ps3_repository_find_reg failed\n", + __func__, __LINE__); + goto fail_find_reg; + } + + result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K, + PS3_DMA_INTERNAL, NULL, 0); + + if (result) { + pr_debug("%s:%d ps3_dma_region_init failed\n", + __func__, __LINE__); + goto fail_dma_init; + } + + result = ps3_mmio_region_init(&p->dev, p->dev.m_region, bus_addr, len, + PS3_MMIO_4K); + + if (result) { + pr_debug("%s:%d ps3_mmio_region_init failed\n", + __func__, __LINE__); + goto fail_mmio_init; + } + + result = ps3_system_bus_device_register(&p->dev); + + if (result) { + pr_debug("%s:%d ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_device_register; + } + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return result; + +fail_device_register: +fail_mmio_init: +fail_dma_init: +fail_find_reg: +fail_find_interrupt: + kfree(p); +fail_malloc: + pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__); + return result; +} + +static int __init ps3_setup_ehci_device( + const struct ps3_repository_device *repo) +{ + return ps3_setup_uhc_device(repo, PS3_MATCH_ID_EHCI, + PS3_INTERRUPT_TYPE_SB_EHCI, PS3_REG_TYPE_SB_EHCI); +} + +static int __init ps3_setup_ohci_device( + const struct ps3_repository_device *repo) +{ + return ps3_setup_uhc_device(repo, PS3_MATCH_ID_OHCI, + PS3_INTERRUPT_TYPE_SB_OHCI, PS3_REG_TYPE_SB_OHCI); +} + +static int __init ps3_setup_vuart_device(enum ps3_match_id match_id, + unsigned int port_number) +{ + int result; + struct layout { + struct ps3_system_bus_device dev; + } *p; + + pr_debug(" -> %s:%d: match_id %u, port %u\n", __func__, __LINE__, + match_id, port_number); + + p = kzalloc(sizeof(struct layout), GFP_KERNEL); + + if (!p) + return -ENOMEM; + + p->dev.match_id = match_id; + p->dev.dev_type = PS3_DEVICE_TYPE_VUART; + p->dev.port_number = port_number; + + result = ps3_system_bus_device_register(&p->dev); + + if (result) { + pr_debug("%s:%d ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_device_register; + } + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return 0; + +fail_device_register: + kfree(p); + pr_debug(" <- %s:%d fail\n", __func__, __LINE__); + return result; +} + +static int ps3_setup_storage_dev(const struct ps3_repository_device *repo, + enum ps3_match_id match_id) +{ + int result; + struct ps3_storage_device *p; + u64 port, blk_size, num_blocks; + unsigned int num_regions, i; + + pr_debug(" -> %s:%u: match_id %u\n", __func__, __LINE__, match_id); + + result = ps3_repository_read_stor_dev_info(repo->bus_index, + repo->dev_index, &port, + &blk_size, &num_blocks, + &num_regions); + if (result) { + printk(KERN_ERR "%s:%u: _read_stor_dev_info failed %d\n", + __func__, __LINE__, result); + return -ENODEV; + } + + pr_debug("%s:%u: (%u:%u:%u): port %llu blk_size %llu num_blocks %llu " + "num_regions %u\n", __func__, __LINE__, repo->bus_index, + repo->dev_index, repo->dev_type, port, blk_size, num_blocks, + num_regions); + + p = kzalloc(struct_size(p, regions, num_regions), GFP_KERNEL); + if (!p) { + result = -ENOMEM; + goto fail_malloc; + } + + p->sbd.match_id = match_id; + p->sbd.dev_type = PS3_DEVICE_TYPE_SB; + p->sbd.bus_id = repo->bus_id; + p->sbd.dev_id = repo->dev_id; + p->sbd.d_region = &p->dma_region; + p->blk_size = blk_size; + p->num_regions = num_regions; + + result = ps3_repository_find_interrupt(repo, + PS3_INTERRUPT_TYPE_EVENT_PORT, + &p->sbd.interrupt_id); + if (result) { + printk(KERN_ERR "%s:%u: find_interrupt failed %d\n", __func__, + __LINE__, result); + result = -ENODEV; + goto fail_find_interrupt; + } + + for (i = 0; i < num_regions; i++) { + unsigned int id; + u64 start, size; + + result = ps3_repository_read_stor_dev_region(repo->bus_index, + repo->dev_index, + i, &id, &start, + &size); + if (result) { + printk(KERN_ERR + "%s:%u: read_stor_dev_region failed %d\n", + __func__, __LINE__, result); + result = -ENODEV; + goto fail_read_region; + } + pr_debug("%s:%u: region %u: id %u start %llu size %llu\n", + __func__, __LINE__, i, id, start, size); + + p->regions[i].id = id; + p->regions[i].start = start; + p->regions[i].size = size; + } + + result = ps3_system_bus_device_register(&p->sbd); + if (result) { + pr_debug("%s:%u ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_device_register; + } + + pr_debug(" <- %s:%u\n", __func__, __LINE__); + return 0; + +fail_device_register: +fail_read_region: +fail_find_interrupt: + kfree(p); +fail_malloc: + pr_debug(" <- %s:%u: fail.\n", __func__, __LINE__); + return result; +} + +static int __init ps3_register_vuart_devices(void) +{ + int result; + unsigned int port_number; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + result = ps3_repository_read_vuart_av_port(&port_number); + if (result) + port_number = 0; /* av default */ + + result = ps3_setup_vuart_device(PS3_MATCH_ID_AV_SETTINGS, port_number); + WARN_ON(result); + + result = ps3_repository_read_vuart_sysmgr_port(&port_number); + if (result) + port_number = 2; /* sysmgr default */ + + result = ps3_setup_vuart_device(PS3_MATCH_ID_SYSTEM_MANAGER, + port_number); + WARN_ON(result); + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +static int __init ps3_register_sound_devices(void) +{ + int result; + struct layout { + struct ps3_system_bus_device dev; + struct ps3_dma_region d_region; + struct ps3_mmio_region m_region; + } *p; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->dev.match_id = PS3_MATCH_ID_SOUND; + p->dev.dev_type = PS3_DEVICE_TYPE_IOC0; + p->dev.d_region = &p->d_region; + p->dev.m_region = &p->m_region; + + result = ps3_system_bus_device_register(&p->dev); + + if (result) { + pr_debug("%s:%d ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_device_register; + } + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return 0; + +fail_device_register: + kfree(p); + pr_debug(" <- %s:%d failed\n", __func__, __LINE__); + return result; +} + +static int __init ps3_register_graphics_devices(void) +{ + int result; + struct layout { + struct ps3_system_bus_device dev; + } *p; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + p = kzalloc(sizeof(struct layout), GFP_KERNEL); + + if (!p) + return -ENOMEM; + + p->dev.match_id = PS3_MATCH_ID_GPU; + p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_FB; + p->dev.dev_type = PS3_DEVICE_TYPE_IOC0; + + result = ps3_system_bus_device_register(&p->dev); + + if (result) { + pr_debug("%s:%d ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_device_register; + } + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return 0; + +fail_device_register: + kfree(p); + pr_debug(" <- %s:%d failed\n", __func__, __LINE__); + return result; +} + +static int __init ps3_register_ramdisk_device(void) +{ + int result; + struct layout { + struct ps3_system_bus_device dev; + } *p; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + p = kzalloc(sizeof(struct layout), GFP_KERNEL); + + if (!p) + return -ENOMEM; + + p->dev.match_id = PS3_MATCH_ID_GPU; + p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK; + p->dev.dev_type = PS3_DEVICE_TYPE_IOC0; + + result = ps3_system_bus_device_register(&p->dev); + + if (result) { + pr_debug("%s:%d ps3_system_bus_device_register failed\n", + __func__, __LINE__); + goto fail_device_register; + } + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return 0; + +fail_device_register: + kfree(p); + pr_debug(" <- %s:%d failed\n", __func__, __LINE__); + return result; +} + +/** + * ps3_setup_dynamic_device - Setup a dynamic device from the repository + */ + +static int ps3_setup_dynamic_device(const struct ps3_repository_device *repo) +{ + int result; + + switch (repo->dev_type) { + case PS3_DEV_TYPE_STOR_DISK: + result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_DISK); + + /* Some devices are not accessible from the Other OS lpar. */ + if (result == -ENODEV) { + result = 0; + pr_debug("%s:%u: not accessible\n", __func__, + __LINE__); + } + + if (result) + pr_debug("%s:%u ps3_setup_storage_dev failed\n", + __func__, __LINE__); + break; + + case PS3_DEV_TYPE_STOR_ROM: + result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_ROM); + if (result) + pr_debug("%s:%u ps3_setup_storage_dev failed\n", + __func__, __LINE__); + break; + + case PS3_DEV_TYPE_STOR_FLASH: + result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_FLASH); + if (result) + pr_debug("%s:%u ps3_setup_storage_dev failed\n", + __func__, __LINE__); + break; + + default: + result = 0; + pr_debug("%s:%u: unsupported dev_type %u\n", __func__, __LINE__, + repo->dev_type); + } + + return result; +} + +/** + * ps3_setup_static_device - Setup a static device from the repository + */ + +static int __init ps3_setup_static_device(const struct ps3_repository_device *repo) +{ + int result; + + switch (repo->dev_type) { + case PS3_DEV_TYPE_SB_GELIC: + result = ps3_setup_gelic_device(repo); + if (result) { + pr_debug("%s:%d ps3_setup_gelic_device failed\n", + __func__, __LINE__); + } + break; + case PS3_DEV_TYPE_SB_USB: + + /* Each USB device has both an EHCI and an OHCI HC */ + + result = ps3_setup_ehci_device(repo); + + if (result) { + pr_debug("%s:%d ps3_setup_ehci_device failed\n", + __func__, __LINE__); + } + + result = ps3_setup_ohci_device(repo); + + if (result) { + pr_debug("%s:%d ps3_setup_ohci_device failed\n", + __func__, __LINE__); + } + break; + + default: + return ps3_setup_dynamic_device(repo); + } + + return result; +} + +static void ps3_find_and_add_device(u64 bus_id, u64 dev_id) +{ + struct ps3_repository_device repo; + int res; + unsigned int retries; + unsigned long rem; + + /* + * On some firmware versions (e.g. 1.90), the device may not show up + * in the repository immediately + */ + for (retries = 0; retries < 10; retries++) { + res = ps3_repository_find_device_by_id(&repo, bus_id, dev_id); + if (!res) + goto found; + + rem = msleep_interruptible(100); + if (rem) + break; + } + pr_warn("%s:%u: device %llu:%llu not found\n", + __func__, __LINE__, bus_id, dev_id); + return; + +found: + if (retries) + pr_debug("%s:%u: device %llu:%llu found after %u retries\n", + __func__, __LINE__, bus_id, dev_id, retries); + + ps3_setup_dynamic_device(&repo); + return; +} + +#define PS3_NOTIFICATION_DEV_ID ULONG_MAX +#define PS3_NOTIFICATION_INTERRUPT_ID 0 + +struct ps3_notification_device { + struct ps3_system_bus_device sbd; + spinlock_t lock; + u64 tag; + u64 lv1_status; + struct rcuwait wait; + bool done; +}; + +enum ps3_notify_type { + notify_device_ready = 0, + notify_region_probe = 1, + notify_region_update = 2, +}; + +struct ps3_notify_cmd { + u64 operation_code; /* must be zero */ + u64 event_mask; /* OR of 1UL << enum ps3_notify_type */ +}; + +struct ps3_notify_event { + u64 event_type; /* enum ps3_notify_type */ + u64 bus_id; + u64 dev_id; + u64 dev_type; + u64 dev_port; +}; + +static irqreturn_t ps3_notification_interrupt(int irq, void *data) +{ + struct ps3_notification_device *dev = data; + int res; + u64 tag, status; + + spin_lock(&dev->lock); + res = lv1_storage_get_async_status(PS3_NOTIFICATION_DEV_ID, &tag, + &status); + if (tag != dev->tag) + pr_err("%s:%u: tag mismatch, got %llx, expected %llx\n", + __func__, __LINE__, tag, dev->tag); + + if (res) { + pr_err("%s:%u: res %d status 0x%llx\n", __func__, __LINE__, res, + status); + } else { + pr_debug("%s:%u: completed, status 0x%llx\n", __func__, + __LINE__, status); + dev->lv1_status = status; + dev->done = true; + rcuwait_wake_up(&dev->wait); + } + spin_unlock(&dev->lock); + return IRQ_HANDLED; +} + +static int ps3_notification_read_write(struct ps3_notification_device *dev, + u64 lpar, int write) +{ + const char *op = write ? "write" : "read"; + unsigned long flags; + int res; + + spin_lock_irqsave(&dev->lock, flags); + res = write ? lv1_storage_write(dev->sbd.dev_id, 0, 0, 1, 0, lpar, + &dev->tag) + : lv1_storage_read(dev->sbd.dev_id, 0, 0, 1, 0, lpar, + &dev->tag); + dev->done = false; + spin_unlock_irqrestore(&dev->lock, flags); + if (res) { + pr_err("%s:%u: %s failed %d\n", __func__, __LINE__, op, res); + return -EPERM; + } + pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); + + rcuwait_wait_event(&dev->wait, dev->done || kthread_should_stop(), TASK_IDLE); + + if (kthread_should_stop()) + res = -EINTR; + + if (dev->lv1_status) { + pr_err("%s:%u: %s not completed, status 0x%llx\n", __func__, + __LINE__, op, dev->lv1_status); + return -EIO; + } + pr_debug("%s:%u: notification %s completed\n", __func__, __LINE__, op); + + return 0; +} + +static struct task_struct *probe_task; + +/** + * ps3_probe_thread - Background repository probing at system startup. + * + * This implementation only supports background probing on a single bus. + * It uses the hypervisor's storage device notification mechanism to wait until + * a storage device is ready. The device notification mechanism uses a + * pseudo device to asynchronously notify the guest when storage devices become + * ready. The notification device has a block size of 512 bytes. + */ + +static int ps3_probe_thread(void *data) +{ + struct ps3_notification_device dev; + int res; + unsigned int irq; + u64 lpar; + void *buf; + struct ps3_notify_cmd *notify_cmd; + struct ps3_notify_event *notify_event; + + pr_debug(" -> %s:%u: kthread started\n", __func__, __LINE__); + + buf = kzalloc(512, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + lpar = ps3_mm_phys_to_lpar(__pa(buf)); + notify_cmd = buf; + notify_event = buf; + + /* dummy system bus device */ + dev.sbd.bus_id = (u64)data; + dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID; + dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID; + + res = lv1_open_device(dev.sbd.bus_id, dev.sbd.dev_id, 0); + if (res) { + pr_err("%s:%u: lv1_open_device failed %s\n", __func__, + __LINE__, ps3_result(res)); + goto fail_free; + } + + res = ps3_sb_event_receive_port_setup(&dev.sbd, PS3_BINDING_CPU_ANY, + &irq); + if (res) { + pr_err("%s:%u: ps3_sb_event_receive_port_setup failed %d\n", + __func__, __LINE__, res); + goto fail_close_device; + } + + spin_lock_init(&dev.lock); + rcuwait_init(&dev.wait); + + res = request_irq(irq, ps3_notification_interrupt, 0, + "ps3_notification", &dev); + if (res) { + pr_err("%s:%u: request_irq failed %d\n", __func__, __LINE__, + res); + goto fail_sb_event_receive_port_destroy; + } + + /* Setup and write the request for device notification. */ + notify_cmd->operation_code = 0; /* must be zero */ + notify_cmd->event_mask = 1UL << notify_region_probe; + + res = ps3_notification_read_write(&dev, lpar, 1); + if (res) + goto fail_free_irq; + + /* Loop here processing the requested notification events. */ + do { + try_to_freeze(); + + memset(notify_event, 0, sizeof(*notify_event)); + + res = ps3_notification_read_write(&dev, lpar, 0); + if (res) + break; + + pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu" + " type %llu port %llu\n", __func__, __LINE__, + notify_event->event_type, notify_event->bus_id, + notify_event->dev_id, notify_event->dev_type, + notify_event->dev_port); + + if (notify_event->event_type != notify_region_probe || + notify_event->bus_id != dev.sbd.bus_id) { + pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n", + __func__, __LINE__, notify_event->event_type, + notify_event->dev_id, notify_event->dev_type); + continue; + } + + ps3_find_and_add_device(dev.sbd.bus_id, notify_event->dev_id); + + } while (!kthread_should_stop()); + +fail_free_irq: + free_irq(irq, &dev); +fail_sb_event_receive_port_destroy: + ps3_sb_event_receive_port_destroy(&dev.sbd, irq); +fail_close_device: + lv1_close_device(dev.sbd.bus_id, dev.sbd.dev_id); +fail_free: + kfree(buf); + + probe_task = NULL; + + pr_debug(" <- %s:%u: kthread finished\n", __func__, __LINE__); + + return 0; +} + +/** + * ps3_stop_probe_thread - Stops the background probe thread. + * + */ + +static int ps3_stop_probe_thread(struct notifier_block *nb, unsigned long code, + void *data) +{ + if (probe_task) + kthread_stop(probe_task); + return 0; +} + +static struct notifier_block nb = { + .notifier_call = ps3_stop_probe_thread +}; + +/** + * ps3_start_probe_thread - Starts the background probe thread. + * + */ + +static int __init ps3_start_probe_thread(enum ps3_bus_type bus_type) +{ + int result; + struct task_struct *task; + struct ps3_repository_device repo; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + memset(&repo, 0, sizeof(repo)); + + repo.bus_type = bus_type; + + result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index); + + if (result) { + printk(KERN_ERR "%s: Cannot find bus (%d)\n", __func__, result); + return -ENODEV; + } + + result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id); + + if (result) { + printk(KERN_ERR "%s: read_bus_id failed %d\n", __func__, + result); + return -ENODEV; + } + + task = kthread_run(ps3_probe_thread, (void *)repo.bus_id, + "ps3-probe-%u", bus_type); + + if (IS_ERR(task)) { + result = PTR_ERR(task); + printk(KERN_ERR "%s: kthread_run failed %d\n", __func__, + result); + return result; + } + + probe_task = task; + register_reboot_notifier(&nb); + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return 0; +} + +/** + * ps3_register_devices - Probe the system and register devices found. + * + * A device_initcall() routine. + */ + +static int __init ps3_register_devices(void) +{ + int result; + + if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) + return -ENODEV; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + /* ps3_repository_dump_bus_info(); */ + + result = ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE); + + ps3_register_vuart_devices(); + + ps3_register_graphics_devices(); + + ps3_repository_find_devices(PS3_BUS_TYPE_SB, ps3_setup_static_device); + + ps3_register_sound_devices(); + + ps3_register_lpm_devices(); + + ps3_register_ramdisk_device(); + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return 0; +} + +device_initcall(ps3_register_devices); diff --git a/arch/powerpc/platforms/ps3/exports.c b/arch/powerpc/platforms/ps3/exports.c new file mode 100644 index 0000000000..1ac31abcf9 --- /dev/null +++ b/arch/powerpc/platforms/ps3/exports.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 hvcall exports for modules. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#define LV1_CALL(name, in, out, num) \ + extern s64 _lv1_##name(LV1_##in##_IN_##out##_OUT_ARG_DECL); \ + EXPORT_SYMBOL(_lv1_##name); + +#include diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c new file mode 100644 index 0000000000..6b298010fd --- /dev/null +++ b/arch/powerpc/platforms/ps3/gelic_udbg.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * udbg debug output routine via GELIC UDP broadcasts + * + * Copyright (C) 2007 Sony Computer Entertainment Inc. + * Copyright 2006, 2007 Sony Corporation + * Copyright (C) 2010 Hector Martin + * Copyright (C) 2011 Andre Heider + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define GELIC_BUS_ID 1 +#define GELIC_DEVICE_ID 0 +#define GELIC_DEBUG_PORT 18194 +#define GELIC_MAX_MESSAGE_SIZE 1000 + +#define GELIC_LV1_GET_MAC_ADDRESS 1 +#define GELIC_LV1_GET_VLAN_ID 4 +#define GELIC_LV1_VLAN_TX_ETHERNET_0 2 + +#define GELIC_DESCR_DMA_STAT_MASK 0xf0000000 +#define GELIC_DESCR_DMA_CARDOWNED 0xa0000000 + +#define GELIC_DESCR_TX_DMA_IKE 0x00080000 +#define GELIC_DESCR_TX_DMA_NO_CHKSUM 0x00000000 +#define GELIC_DESCR_TX_DMA_FRAME_TAIL 0x00040000 + +#define GELIC_DESCR_DMA_CMD_NO_CHKSUM (GELIC_DESCR_DMA_CARDOWNED | \ + GELIC_DESCR_TX_DMA_IKE | \ + GELIC_DESCR_TX_DMA_NO_CHKSUM) + +static u64 bus_addr; + +struct gelic_descr { + /* as defined by the hardware */ + __be32 buf_addr; + __be32 buf_size; + __be32 next_descr_addr; + __be32 dmac_cmd_status; + __be32 result_size; + __be32 valid_size; /* all zeroes for tx */ + __be32 data_status; + __be32 data_error; /* all zeroes for tx */ +} __attribute__((aligned(32))); + +struct debug_block { + struct gelic_descr descr; + u8 pkt[1520]; +} __packed; + +static __iomem struct ethhdr *h_eth; +static __iomem struct vlan_hdr *h_vlan; +static __iomem struct iphdr *h_ip; +static __iomem struct udphdr *h_udp; + +static __iomem char *pmsg; +static __iomem char *pmsgc; + +static __iomem struct debug_block dbg __attribute__((aligned(32))); + +static int header_size; + +static void map_dma_mem(int bus_id, int dev_id, void *start, size_t len, + u64 *real_bus_addr) +{ + s64 result; + u64 real_addr = ((u64)start) & 0x0fffffffffffffffUL; + u64 real_end = real_addr + len; + u64 map_start = real_addr & ~0xfff; + u64 map_end = (real_end + 0xfff) & ~0xfff; + u64 bus_addr = 0; + + u64 flags = 0xf800000000000000UL; + + result = lv1_allocate_device_dma_region(bus_id, dev_id, + map_end - map_start, 12, 0, + &bus_addr); + if (result) + lv1_panic(0); + + result = lv1_map_device_dma_region(bus_id, dev_id, map_start, + bus_addr, map_end - map_start, + flags); + if (result) + lv1_panic(0); + + *real_bus_addr = bus_addr + real_addr - map_start; +} + +static int unmap_dma_mem(int bus_id, int dev_id, u64 bus_addr, size_t len) +{ + s64 result; + u64 real_bus_addr; + + real_bus_addr = bus_addr & ~0xfff; + len += bus_addr - real_bus_addr; + len = (len + 0xfff) & ~0xfff; + + result = lv1_unmap_device_dma_region(bus_id, dev_id, real_bus_addr, + len); + if (result) + return result; + + return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr); +} + +static void __init gelic_debug_init(void) +{ + s64 result; + u64 v2; + u64 mac; + u64 vlan_id; + + result = lv1_open_device(GELIC_BUS_ID, GELIC_DEVICE_ID, 0); + if (result) + lv1_panic(0); + + map_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID, &dbg, sizeof(dbg), + &bus_addr); + + memset(&dbg, 0, sizeof(dbg)); + + dbg.descr.buf_addr = bus_addr + offsetof(struct debug_block, pkt); + + wmb(); + + result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID, + GELIC_LV1_GET_MAC_ADDRESS, 0, 0, 0, + &mac, &v2); + if (result) + lv1_panic(0); + + mac <<= 16; + + h_eth = (struct ethhdr *)dbg.pkt; + + eth_broadcast_addr(h_eth->h_dest); + memcpy(&h_eth->h_source, &mac, ETH_ALEN); + + header_size = sizeof(struct ethhdr); + + result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID, + GELIC_LV1_GET_VLAN_ID, + GELIC_LV1_VLAN_TX_ETHERNET_0, 0, 0, + &vlan_id, &v2); + if (!result) { + h_eth->h_proto= ETH_P_8021Q; + + header_size += sizeof(struct vlan_hdr); + h_vlan = (struct vlan_hdr *)(h_eth + 1); + h_vlan->h_vlan_TCI = vlan_id; + h_vlan->h_vlan_encapsulated_proto = ETH_P_IP; + h_ip = (struct iphdr *)(h_vlan + 1); + } else { + h_eth->h_proto= 0x0800; + h_ip = (struct iphdr *)(h_eth + 1); + } + + header_size += sizeof(struct iphdr); + h_ip->version = 4; + h_ip->ihl = 5; + h_ip->ttl = 10; + h_ip->protocol = 0x11; + h_ip->saddr = 0x00000000; + h_ip->daddr = 0xffffffff; + + header_size += sizeof(struct udphdr); + h_udp = (struct udphdr *)(h_ip + 1); + h_udp->source = GELIC_DEBUG_PORT; + h_udp->dest = GELIC_DEBUG_PORT; + + pmsgc = pmsg = (char *)(h_udp + 1); +} + +static void gelic_debug_shutdown(void) +{ + if (bus_addr) + unmap_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID, + bus_addr, sizeof(dbg)); + lv1_close_device(GELIC_BUS_ID, GELIC_DEVICE_ID); +} + +static void gelic_sendbuf(int msgsize) +{ + u16 *p; + u32 sum; + int i; + + dbg.descr.buf_size = header_size + msgsize; + h_ip->tot_len = msgsize + sizeof(struct udphdr) + + sizeof(struct iphdr); + h_udp->len = msgsize + sizeof(struct udphdr); + + h_ip->check = 0; + sum = 0; + p = (u16 *)h_ip; + for (i = 0; i < 5; i++) + sum += *p++; + h_ip->check = ~(sum + (sum >> 16)); + + dbg.descr.dmac_cmd_status = GELIC_DESCR_DMA_CMD_NO_CHKSUM | + GELIC_DESCR_TX_DMA_FRAME_TAIL; + dbg.descr.result_size = 0; + dbg.descr.data_status = 0; + + wmb(); + + lv1_net_start_tx_dma(GELIC_BUS_ID, GELIC_DEVICE_ID, bus_addr, 0); + + while ((dbg.descr.dmac_cmd_status & GELIC_DESCR_DMA_STAT_MASK) == + GELIC_DESCR_DMA_CARDOWNED) + cpu_relax(); +} + +static void ps3gelic_udbg_putc(char ch) +{ + *pmsgc++ = ch; + if (ch == '\n' || (pmsgc-pmsg) >= GELIC_MAX_MESSAGE_SIZE) { + gelic_sendbuf(pmsgc-pmsg); + pmsgc = pmsg; + } +} + +void __init udbg_init_ps3gelic(void) +{ + gelic_debug_init(); + udbg_putc = ps3gelic_udbg_putc; +} + +void udbg_shutdown_ps3gelic(void) +{ + udbg_putc = NULL; + gelic_debug_shutdown(); +} +EXPORT_SYMBOL(udbg_shutdown_ps3gelic); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c new file mode 100644 index 0000000000..9de62bd526 --- /dev/null +++ b/arch/powerpc/platforms/ps3/htab.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 pagetable management routines. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006, 2007 Sony Corporation + */ + +#include +#include + +#include +#include +#include +#include + +#define PS3_VERBOSE_RESULT +#include "platform.h" + +/** + * enum lpar_vas_id - id of LPAR virtual address space. + * @lpar_vas_id_current: Current selected virtual address space + * + * Identify the target LPAR address space. + */ + +enum ps3_lpar_vas_id { + PS3_LPAR_VAS_ID_CURRENT = 0, +}; + + +static DEFINE_SPINLOCK(ps3_htab_lock); + +static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, + unsigned long pa, unsigned long rflags, unsigned long vflags, + int psize, int apsize, int ssize) +{ + int result; + u64 hpte_v, hpte_r; + u64 inserted_index; + u64 evicted_v, evicted_r; + u64 hpte_v_array[4], hpte_rs; + unsigned long flags; + long ret = -1; + + /* + * lv1_insert_htab_entry() will search for victim + * entry in both primary and secondary pte group + */ + vflags &= ~HPTE_V_SECONDARY; + + hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; + + spin_lock_irqsave(&ps3_htab_lock, flags); + + /* talk hvc to replace entries BOLTED == 0 */ + result = lv1_insert_htab_entry(PS3_LPAR_VAS_ID_CURRENT, hpte_group, + hpte_v, hpte_r, + HPTE_V_BOLTED, 0, + &inserted_index, + &evicted_v, &evicted_r); + + if (result) { + /* all entries bolted !*/ + pr_info("%s:result=%s vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n", + __func__, ps3_result(result), vpn, pa, hpte_group, + hpte_v, hpte_r); + BUG(); + } + + /* + * see if the entry is inserted into secondary pteg + */ + result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT, + inserted_index & ~0x3UL, + &hpte_v_array[0], &hpte_v_array[1], + &hpte_v_array[2], &hpte_v_array[3], + &hpte_rs); + BUG_ON(result); + + if (hpte_v_array[inserted_index % 4] & HPTE_V_SECONDARY) + ret = (inserted_index & 7) | (1 << 3); + else + ret = inserted_index & 7; + + spin_unlock_irqrestore(&ps3_htab_lock, flags); + + return ret; +} + +static long ps3_hpte_remove(unsigned long hpte_group) +{ + panic("ps3_hpte_remove() not implemented"); + return 0; +} + +static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long vpn, int psize, int apsize, + int ssize, unsigned long inv_flags) +{ + int result; + u64 hpte_v, want_v, hpte_rs; + u64 hpte_v_array[4]; + unsigned long flags; + long ret; + + want_v = hpte_encode_avpn(vpn, psize, ssize); + + spin_lock_irqsave(&ps3_htab_lock, flags); + + result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT, slot & ~0x3UL, + &hpte_v_array[0], &hpte_v_array[1], + &hpte_v_array[2], &hpte_v_array[3], + &hpte_rs); + + if (result) { + pr_info("%s: result=%s read vpn=%lx slot=%lx psize=%d\n", + __func__, ps3_result(result), vpn, slot, psize); + BUG(); + } + + hpte_v = hpte_v_array[slot % 4]; + + /* + * As lv1_read_htab_entries() does not give us the RPN, we can + * not synthesize the new hpte_r value here, and therefore can + * not update the hpte with lv1_insert_htab_entry(), so we + * instead invalidate it and ask the caller to update it via + * ps3_hpte_insert() by returning a -1 value. + */ + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { + /* not found */ + ret = -1; + } else { + /* entry found, just invalidate it */ + result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, + slot, 0, 0); + ret = -1; + } + + spin_unlock_irqrestore(&ps3_htab_lock, flags); + return ret; +} + +static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, + int psize, int ssize) +{ + pr_info("ps3_hpte_updateboltedpp() not implemented"); +} + +static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, + int psize, int apsize, int ssize, int local) +{ + unsigned long flags; + int result; + + spin_lock_irqsave(&ps3_htab_lock, flags); + + result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0); + + if (result) { + pr_info("%s: result=%s vpn=%lx slot=%lx psize=%d\n", + __func__, ps3_result(result), vpn, slot, psize); + BUG(); + } + + spin_unlock_irqrestore(&ps3_htab_lock, flags); +} + +/* Called during kexec sequence with MMU off */ +static notrace void ps3_hpte_clear(void) +{ + unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4; + u64 i; + + for (i = 0; i < hpte_count; i++) + lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, i, 0, 0); + + ps3_mm_shutdown(); + ps3_mm_vas_destroy(); +} + +void __init ps3_hpte_init(unsigned long htab_size) +{ + mmu_hash_ops.hpte_invalidate = ps3_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = ps3_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = ps3_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = ps3_hpte_insert; + mmu_hash_ops.hpte_remove = ps3_hpte_remove; + mmu_hash_ops.hpte_clear_all = ps3_hpte_clear; + + ppc64_pft_size = __ilog2(htab_size); +} + diff --git a/arch/powerpc/platforms/ps3/hvcall.S b/arch/powerpc/platforms/ps3/hvcall.S new file mode 100644 index 0000000000..509e30ad01 --- /dev/null +++ b/arch/powerpc/platforms/ps3/hvcall.S @@ -0,0 +1,792 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PS3 hvcall interface. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + * Copyright 2003, 2004 (c) MontaVista Software, Inc. + */ + +#include +#include + +#define lv1call .long 0x44000022; extsw r3, r3 + +#define LV1_N_IN_0_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_0_IN_0_OUT LV1_N_IN_0_OUT +#define LV1_1_IN_0_OUT LV1_N_IN_0_OUT +#define LV1_2_IN_0_OUT LV1_N_IN_0_OUT +#define LV1_3_IN_0_OUT LV1_N_IN_0_OUT +#define LV1_4_IN_0_OUT LV1_N_IN_0_OUT +#define LV1_5_IN_0_OUT LV1_N_IN_0_OUT +#define LV1_6_IN_0_OUT LV1_N_IN_0_OUT +#define LV1_7_IN_0_OUT LV1_N_IN_0_OUT + +#define LV1_0_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r3, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_0_IN_2_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r3, -8(r1); \ + stdu r4, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_0_IN_3_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r3, -8(r1); \ + std r4, -16(r1); \ + stdu r5, -24(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 24; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_0_IN_7_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r3, -8(r1); \ + std r4, -16(r1); \ + std r5, -24(r1); \ + std r6, -32(r1); \ + std r7, -40(r1); \ + std r8, -48(r1); \ + stdu r9, -56(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 56; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + ld r11, -32(r1); \ + std r7, 0(r11); \ + ld r11, -40(r1); \ + std r8, 0(r11); \ + ld r11, -48(r1); \ + std r9, 0(r11); \ + ld r11, -56(r1); \ + std r10, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_1_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r4, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_1_IN_2_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r4, -8(r1); \ + stdu r5, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_1_IN_3_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r4, -8(r1); \ + std r5, -16(r1); \ + stdu r6, -24(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 24; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_1_IN_4_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r4, -8(r1); \ + std r5, -16(r1); \ + std r6, -24(r1); \ + stdu r7, -32(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 32; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + ld r11, -32(r1); \ + std r7, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_1_IN_5_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r4, -8(r1); \ + std r5, -16(r1); \ + std r6, -24(r1); \ + std r7, -32(r1); \ + stdu r8, -40(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 40; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + ld r11, -32(r1); \ + std r7, 0(r11); \ + ld r11, -40(r1); \ + std r8, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_1_IN_6_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r4, -8(r1); \ + std r5, -16(r1); \ + std r6, -24(r1); \ + std r7, -32(r1); \ + std r8, -40(r1); \ + stdu r9, -48(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 48; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + ld r11, -32(r1); \ + std r7, 0(r11); \ + ld r11, -40(r1); \ + std r8, 0(r11); \ + ld r11, -48(r1); \ + std r9, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_1_IN_7_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r4, -8(r1); \ + std r5, -16(r1); \ + std r6, -24(r1); \ + std r7, -32(r1); \ + std r8, -40(r1); \ + std r9, -48(r1); \ + stdu r10, -56(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 56; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + ld r11, -32(r1); \ + std r7, 0(r11); \ + ld r11, -40(r1); \ + std r8, 0(r11); \ + ld r11, -48(r1); \ + std r9, 0(r11); \ + ld r11, -56(r1); \ + std r10, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_2_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r5, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_2_IN_2_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r5, -8(r1); \ + stdu r6, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_2_IN_3_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r5, -8(r1); \ + std r6, -16(r1); \ + stdu r7, -24(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 24; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_2_IN_4_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r5, -8(r1); \ + std r6, -16(r1); \ + std r7, -24(r1); \ + stdu r8, -32(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 32; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + ld r11, -32(r1); \ + std r7, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_2_IN_5_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r5, -8(r1); \ + std r6, -16(r1); \ + std r7, -24(r1); \ + std r8, -32(r1); \ + stdu r9, -40(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 40; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + ld r11, -32(r1); \ + std r7, 0(r11); \ + ld r11, -40(r1); \ + std r8, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_3_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r6, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_3_IN_2_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r6, -8(r1); \ + stdu r7, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_3_IN_3_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r6, -8(r1); \ + std r7, -16(r1); \ + stdu r8, -24(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 24; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_4_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r7, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_4_IN_2_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r7, -8(r1); \ + stdu r8, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_4_IN_3_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r7, -8(r1); \ + std r8, -16(r1); \ + stdu r9, -24(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 24; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_5_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r8, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_5_IN_2_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r8, -8(r1); \ + stdu r9, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_5_IN_3_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r8, -8(r1); \ + std r9, -16(r1); \ + stdu r10, -24(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 24; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, -24(r1); \ + std r6, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_6_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r9, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_6_IN_2_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r9, -8(r1); \ + stdu r10, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_6_IN_3_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r9, -8(r1); \ + stdu r10, -16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 16; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + ld r11, -16(r1); \ + std r5, 0(r11); \ + ld r11, 48+8*8(r1); \ + std r6, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_7_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + stdu r10, -8(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + addi r1, r1, 8; \ + ld r11, -8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_7_IN_6_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + std r10, 48+8*7(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + ld r11, 48+8*7(r1); \ + std r4, 0(r11); \ + ld r11, 48+8*8(r1); \ + std r5, 0(r11); \ + ld r11, 48+8*9(r1); \ + std r6, 0(r11); \ + ld r11, 48+8*10(r1); \ + std r7, 0(r11); \ + ld r11, 48+8*11(r1); \ + std r8, 0(r11); \ + ld r11, 48+8*12(r1); \ + std r9, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + +#define LV1_8_IN_1_OUT(API_NAME, API_NUMBER) \ +_GLOBAL(_##API_NAME) \ + \ + mflr r0; \ + std r0, 16(r1); \ + \ + li r11, API_NUMBER; \ + lv1call; \ + \ + ld r11, 48+8*8(r1); \ + std r4, 0(r11); \ + \ + ld r0, 16(r1); \ + mtlr r0; \ + blr + + .text + +/* the lv1 underscored call definitions expand here */ + +#define LV1_CALL(name, in, out, num) LV1_##in##_IN_##out##_OUT(lv1_##name, num) +#include diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c new file mode 100644 index 0000000000..49871427f5 --- /dev/null +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -0,0 +1,783 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 interrupt routines. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "platform.h" + +#if defined(DEBUG) +#define DBG udbg_printf +#define FAIL udbg_printf +#else +#define DBG pr_devel +#define FAIL pr_debug +#endif + +/** + * struct ps3_bmp - a per cpu irq status and mask bitmap structure + * @status: 256 bit status bitmap indexed by plug + * @unused_1: Alignment + * @mask: 256 bit mask bitmap indexed by plug + * @unused_2: Alignment + * + * The HV maintains per SMT thread mappings of HV outlet to HV plug on + * behalf of the guest. These mappings are implemented as 256 bit guest + * supplied bitmaps indexed by plug number. The addresses of the bitmaps + * are registered with the HV through lv1_configure_irq_state_bitmap(). + * The HV requires that the 512 bits of status + mask not cross a page + * boundary. PS3_BMP_MINALIGN is used to define this minimal 64 byte + * alignment. + * + * The HV supports 256 plugs per thread, assigned as {0..255}, for a total + * of 512 plugs supported on a processor. To simplify the logic this + * implementation equates HV plug value to Linux virq value, constrains each + * interrupt to have a system wide unique plug number, and limits the range + * of the plug values to map into the first dword of the bitmaps. This + * gives a usable range of plug values of {NR_IRQS_LEGACY..63}. Note + * that there is no constraint on how many in this set an individual thread + * can acquire. + * + * The mask is declared as unsigned long so we can use set/clear_bit on it. + */ + +#define PS3_BMP_MINALIGN 64 + +struct ps3_bmp { + struct { + u64 status; + u64 unused_1[3]; + unsigned long mask; + u64 unused_2[3]; + }; +}; + +/** + * struct ps3_private - a per cpu data structure + * @bmp: ps3_bmp structure + * @bmp_lock: Synchronize access to bmp. + * @ipi_debug_brk_mask: Mask for debug break IPIs + * @ppe_id: HV logical_ppe_id + * @thread_id: HV thread_id + * @ipi_mask: Mask of IPI virqs + */ + +struct ps3_private { + struct ps3_bmp bmp __attribute__ ((aligned (PS3_BMP_MINALIGN))); + spinlock_t bmp_lock; + u64 ppe_id; + u64 thread_id; + unsigned long ipi_debug_brk_mask; + unsigned long ipi_mask; +}; + +static DEFINE_PER_CPU(struct ps3_private, ps3_private); + +/** + * ps3_chip_mask - Set an interrupt mask bit in ps3_bmp. + * @virq: The assigned Linux virq. + * + * Sets ps3_bmp.mask and calls lv1_did_update_interrupt_mask(). + */ + +static void ps3_chip_mask(struct irq_data *d) +{ + struct ps3_private *pd = irq_data_get_irq_chip_data(d); + unsigned long flags; + + DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__, + pd->thread_id, d->irq); + + local_irq_save(flags); + clear_bit(63 - d->irq, &pd->bmp.mask); + lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id); + local_irq_restore(flags); +} + +/** + * ps3_chip_unmask - Clear an interrupt mask bit in ps3_bmp. + * @virq: The assigned Linux virq. + * + * Clears ps3_bmp.mask and calls lv1_did_update_interrupt_mask(). + */ + +static void ps3_chip_unmask(struct irq_data *d) +{ + struct ps3_private *pd = irq_data_get_irq_chip_data(d); + unsigned long flags; + + DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__, + pd->thread_id, d->irq); + + local_irq_save(flags); + set_bit(63 - d->irq, &pd->bmp.mask); + lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id); + local_irq_restore(flags); +} + +/** + * ps3_chip_eoi - HV end-of-interrupt. + * @virq: The assigned Linux virq. + * + * Calls lv1_end_of_interrupt_ext(). + */ + +static void ps3_chip_eoi(struct irq_data *d) +{ + const struct ps3_private *pd = irq_data_get_irq_chip_data(d); + + /* non-IPIs are EOIed here. */ + + if (!test_bit(63 - d->irq, &pd->ipi_mask)) + lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, d->irq); +} + +/** + * ps3_irq_chip - Represents the ps3_bmp as a Linux struct irq_chip. + */ + +static struct irq_chip ps3_irq_chip = { + .name = "ps3", + .irq_mask = ps3_chip_mask, + .irq_unmask = ps3_chip_unmask, + .irq_eoi = ps3_chip_eoi, +}; + +/** + * ps3_virq_setup - virq related setup. + * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be + * serviced on. + * @outlet: The HV outlet from the various create outlet routines. + * @virq: The assigned Linux virq. + * + * Calls irq_create_mapping() to get a virq and sets the chip data to + * ps3_private data. + */ + +static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet, + unsigned int *virq) +{ + int result; + struct ps3_private *pd; + + /* This defines the default interrupt distribution policy. */ + + if (cpu == PS3_BINDING_CPU_ANY) + cpu = 0; + + pd = &per_cpu(ps3_private, cpu); + + *virq = irq_create_mapping(NULL, outlet); + + if (!*virq) { + FAIL("%s:%d: irq_create_mapping failed: outlet %lu\n", + __func__, __LINE__, outlet); + result = -ENOMEM; + goto fail_create; + } + + DBG("%s:%d: outlet %lu => cpu %u, virq %u\n", __func__, __LINE__, + outlet, cpu, *virq); + + result = irq_set_chip_data(*virq, pd); + + if (result) { + FAIL("%s:%d: irq_set_chip_data failed\n", + __func__, __LINE__); + goto fail_set; + } + + ps3_chip_mask(irq_get_irq_data(*virq)); + + return result; + +fail_set: + irq_dispose_mapping(*virq); +fail_create: + return result; +} + +/** + * ps3_virq_destroy - virq related teardown. + * @virq: The assigned Linux virq. + * + * Clears chip data and calls irq_dispose_mapping() for the virq. + */ + +static int ps3_virq_destroy(unsigned int virq) +{ + const struct ps3_private *pd = irq_get_chip_data(virq); + + DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__, + __LINE__, pd->ppe_id, pd->thread_id, virq); + + irq_set_chip_data(virq, NULL); + irq_dispose_mapping(virq); + + DBG("%s:%d <-\n", __func__, __LINE__); + return 0; +} + +/** + * ps3_irq_plug_setup - Generic outlet and virq related setup. + * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be + * serviced on. + * @outlet: The HV outlet from the various create outlet routines. + * @virq: The assigned Linux virq. + * + * Sets up virq and connects the irq plug. + */ + +int ps3_irq_plug_setup(enum ps3_cpu_binding cpu, unsigned long outlet, + unsigned int *virq) +{ + int result; + struct ps3_private *pd; + + result = ps3_virq_setup(cpu, outlet, virq); + + if (result) { + FAIL("%s:%d: ps3_virq_setup failed\n", __func__, __LINE__); + goto fail_setup; + } + + pd = irq_get_chip_data(*virq); + + /* Binds outlet to cpu + virq. */ + + result = lv1_connect_irq_plug_ext(pd->ppe_id, pd->thread_id, *virq, + outlet, 0); + + if (result) { + FAIL("%s:%d: lv1_connect_irq_plug_ext failed: %s\n", + __func__, __LINE__, ps3_result(result)); + result = -EPERM; + goto fail_connect; + } + + return result; + +fail_connect: + ps3_virq_destroy(*virq); +fail_setup: + return result; +} +EXPORT_SYMBOL_GPL(ps3_irq_plug_setup); + +/** + * ps3_irq_plug_destroy - Generic outlet and virq related teardown. + * @virq: The assigned Linux virq. + * + * Disconnects the irq plug and tears down virq. + * Do not call for system bus event interrupts setup with + * ps3_sb_event_receive_port_setup(). + */ + +int ps3_irq_plug_destroy(unsigned int virq) +{ + int result; + const struct ps3_private *pd = irq_get_chip_data(virq); + + DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__, + __LINE__, pd->ppe_id, pd->thread_id, virq); + + ps3_chip_mask(irq_get_irq_data(virq)); + + result = lv1_disconnect_irq_plug_ext(pd->ppe_id, pd->thread_id, virq); + + if (result) + FAIL("%s:%d: lv1_disconnect_irq_plug_ext failed: %s\n", + __func__, __LINE__, ps3_result(result)); + + ps3_virq_destroy(virq); + + return result; +} +EXPORT_SYMBOL_GPL(ps3_irq_plug_destroy); + +/** + * ps3_event_receive_port_setup - Setup an event receive port. + * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be + * serviced on. + * @virq: The assigned Linux virq. + * + * The virq can be used with lv1_connect_interrupt_event_receive_port() to + * arrange to receive interrupts from system-bus devices, or with + * ps3_send_event_locally() to signal events. + */ + +int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq) +{ + int result; + u64 outlet; + + result = lv1_construct_event_receive_port(&outlet); + + if (result) { + FAIL("%s:%d: lv1_construct_event_receive_port failed: %s\n", + __func__, __LINE__, ps3_result(result)); + *virq = 0; + return result; + } + + result = ps3_irq_plug_setup(cpu, outlet, virq); + BUG_ON(result); + + return result; +} +EXPORT_SYMBOL_GPL(ps3_event_receive_port_setup); + +/** + * ps3_event_receive_port_destroy - Destroy an event receive port. + * @virq: The assigned Linux virq. + * + * Since ps3_event_receive_port_destroy destroys the receive port outlet, + * SB devices need to call disconnect_interrupt_event_receive_port() before + * this. + */ + +int ps3_event_receive_port_destroy(unsigned int virq) +{ + int result; + + DBG(" -> %s:%d virq %u\n", __func__, __LINE__, virq); + + ps3_chip_mask(irq_get_irq_data(virq)); + + result = lv1_destruct_event_receive_port(virq_to_hw(virq)); + + if (result) + FAIL("%s:%d: lv1_destruct_event_receive_port failed: %s\n", + __func__, __LINE__, ps3_result(result)); + + /* + * Don't call ps3_virq_destroy() here since ps3_smp_cleanup_cpu() + * calls from interrupt context (smp_call_function) when kexecing. + */ + + DBG(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +int ps3_send_event_locally(unsigned int virq) +{ + return lv1_send_event_locally(virq_to_hw(virq)); +} + +/** + * ps3_sb_event_receive_port_setup - Setup a system bus event receive port. + * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be + * serviced on. + * @dev: The system bus device instance. + * @virq: The assigned Linux virq. + * + * An event irq represents a virtual device interrupt. The interrupt_id + * coresponds to the software interrupt number. + */ + +int ps3_sb_event_receive_port_setup(struct ps3_system_bus_device *dev, + enum ps3_cpu_binding cpu, unsigned int *virq) +{ + /* this should go in system-bus.c */ + + int result; + + result = ps3_event_receive_port_setup(cpu, virq); + + if (result) + return result; + + result = lv1_connect_interrupt_event_receive_port(dev->bus_id, + dev->dev_id, virq_to_hw(*virq), dev->interrupt_id); + + if (result) { + FAIL("%s:%d: lv1_connect_interrupt_event_receive_port" + " failed: %s\n", __func__, __LINE__, + ps3_result(result)); + ps3_event_receive_port_destroy(*virq); + *virq = 0; + return result; + } + + DBG("%s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__, + dev->interrupt_id, *virq); + + return 0; +} +EXPORT_SYMBOL(ps3_sb_event_receive_port_setup); + +int ps3_sb_event_receive_port_destroy(struct ps3_system_bus_device *dev, + unsigned int virq) +{ + /* this should go in system-bus.c */ + + int result; + + DBG(" -> %s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__, + dev->interrupt_id, virq); + + result = lv1_disconnect_interrupt_event_receive_port(dev->bus_id, + dev->dev_id, virq_to_hw(virq), dev->interrupt_id); + + if (result) + FAIL("%s:%d: lv1_disconnect_interrupt_event_receive_port" + " failed: %s\n", __func__, __LINE__, + ps3_result(result)); + + result = ps3_event_receive_port_destroy(virq); + BUG_ON(result); + + /* + * ps3_event_receive_port_destroy() destroys the IRQ plug, + * so don't call ps3_irq_plug_destroy() here. + */ + + result = ps3_virq_destroy(virq); + BUG_ON(result); + + DBG(" <- %s:%d\n", __func__, __LINE__); + return result; +} +EXPORT_SYMBOL(ps3_sb_event_receive_port_destroy); + +/** + * ps3_io_irq_setup - Setup a system bus io irq. + * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be + * serviced on. + * @interrupt_id: The device interrupt id read from the system repository. + * @virq: The assigned Linux virq. + * + * An io irq represents a non-virtualized device interrupt. interrupt_id + * coresponds to the interrupt number of the interrupt controller. + */ + +int ps3_io_irq_setup(enum ps3_cpu_binding cpu, unsigned int interrupt_id, + unsigned int *virq) +{ + int result; + u64 outlet; + + result = lv1_construct_io_irq_outlet(interrupt_id, &outlet); + + if (result) { + FAIL("%s:%d: lv1_construct_io_irq_outlet failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return result; + } + + result = ps3_irq_plug_setup(cpu, outlet, virq); + BUG_ON(result); + + return result; +} +EXPORT_SYMBOL_GPL(ps3_io_irq_setup); + +int ps3_io_irq_destroy(unsigned int virq) +{ + int result; + unsigned long outlet = virq_to_hw(virq); + + ps3_chip_mask(irq_get_irq_data(virq)); + + /* + * lv1_destruct_io_irq_outlet() will destroy the IRQ plug, + * so call ps3_irq_plug_destroy() first. + */ + + result = ps3_irq_plug_destroy(virq); + BUG_ON(result); + + result = lv1_destruct_io_irq_outlet(outlet); + + if (result) + FAIL("%s:%d: lv1_destruct_io_irq_outlet failed: %s\n", + __func__, __LINE__, ps3_result(result)); + + return result; +} +EXPORT_SYMBOL_GPL(ps3_io_irq_destroy); + +/** + * ps3_vuart_irq_setup - Setup the system virtual uart virq. + * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be + * serviced on. + * @virt_addr_bmp: The caller supplied virtual uart interrupt bitmap. + * @virq: The assigned Linux virq. + * + * The system supports only a single virtual uart, so multiple calls without + * freeing the interrupt will return a wrong state error. + */ + +int ps3_vuart_irq_setup(enum ps3_cpu_binding cpu, void* virt_addr_bmp, + unsigned int *virq) +{ + int result; + u64 outlet; + u64 lpar_addr; + + BUG_ON(!is_kernel_addr((u64)virt_addr_bmp)); + + lpar_addr = ps3_mm_phys_to_lpar(__pa(virt_addr_bmp)); + + result = lv1_configure_virtual_uart_irq(lpar_addr, &outlet); + + if (result) { + FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return result; + } + + result = ps3_irq_plug_setup(cpu, outlet, virq); + BUG_ON(result); + + return result; +} +EXPORT_SYMBOL_GPL(ps3_vuart_irq_setup); + +int ps3_vuart_irq_destroy(unsigned int virq) +{ + int result; + + ps3_chip_mask(irq_get_irq_data(virq)); + result = lv1_deconfigure_virtual_uart_irq(); + + if (result) { + FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return result; + } + + result = ps3_irq_plug_destroy(virq); + BUG_ON(result); + + return result; +} +EXPORT_SYMBOL_GPL(ps3_vuart_irq_destroy); + +/** + * ps3_spe_irq_setup - Setup an spe virq. + * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be + * serviced on. + * @spe_id: The spe_id returned from lv1_construct_logical_spe(). + * @class: The spe interrupt class {0,1,2}. + * @virq: The assigned Linux virq. + * + */ + +int ps3_spe_irq_setup(enum ps3_cpu_binding cpu, unsigned long spe_id, + unsigned int class, unsigned int *virq) +{ + int result; + u64 outlet; + + BUG_ON(class > 2); + + result = lv1_get_spe_irq_outlet(spe_id, class, &outlet); + + if (result) { + FAIL("%s:%d: lv1_get_spe_irq_outlet failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return result; + } + + result = ps3_irq_plug_setup(cpu, outlet, virq); + BUG_ON(result); + + return result; +} + +int ps3_spe_irq_destroy(unsigned int virq) +{ + int result; + + ps3_chip_mask(irq_get_irq_data(virq)); + + result = ps3_irq_plug_destroy(virq); + BUG_ON(result); + + return result; +} + + +#define PS3_INVALID_OUTLET ((irq_hw_number_t)-1) +#define PS3_PLUG_MAX 63 + +#if defined(DEBUG) +static void _dump_64_bmp(const char *header, const u64 *p, unsigned cpu, + const char* func, int line) +{ + pr_debug("%s:%d: %s %u {%04llx_%04llx_%04llx_%04llx}\n", + func, line, header, cpu, + *p >> 48, (*p >> 32) & 0xffff, (*p >> 16) & 0xffff, + *p & 0xffff); +} + +static void __maybe_unused _dump_256_bmp(const char *header, + const u64 *p, unsigned cpu, const char* func, int line) +{ + pr_debug("%s:%d: %s %u {%016llx:%016llx:%016llx:%016llx}\n", + func, line, header, cpu, p[0], p[1], p[2], p[3]); +} + +#define dump_bmp(_x) _dump_bmp(_x, __func__, __LINE__) +static void _dump_bmp(struct ps3_private* pd, const char* func, int line) +{ + unsigned long flags; + + spin_lock_irqsave(&pd->bmp_lock, flags); + _dump_64_bmp("stat", &pd->bmp.status, pd->thread_id, func, line); + _dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line); + spin_unlock_irqrestore(&pd->bmp_lock, flags); +} + +#define dump_mask(_x) _dump_mask(_x, __func__, __LINE__) +static void __maybe_unused _dump_mask(struct ps3_private *pd, + const char* func, int line) +{ + unsigned long flags; + + spin_lock_irqsave(&pd->bmp_lock, flags); + _dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line); + spin_unlock_irqrestore(&pd->bmp_lock, flags); +} +#else +static void dump_bmp(struct ps3_private* pd) {}; +#endif /* defined(DEBUG) */ + +static int ps3_host_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hwirq) +{ + DBG("%s:%d: hwirq %lu, virq %u\n", __func__, __LINE__, hwirq, + virq); + + irq_set_chip_and_handler(virq, &ps3_irq_chip, handle_fasteoi_irq); + + return 0; +} + +static int ps3_host_match(struct irq_domain *h, struct device_node *np, + enum irq_domain_bus_token bus_token) +{ + /* Match all */ + return 1; +} + +static const struct irq_domain_ops ps3_host_ops = { + .map = ps3_host_map, + .match = ps3_host_match, +}; + +void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq) +{ + struct ps3_private *pd = &per_cpu(ps3_private, cpu); + + set_bit(63 - virq, &pd->ipi_debug_brk_mask); + + DBG("%s:%d: cpu %u, virq %u, mask %lxh\n", __func__, __LINE__, + cpu, virq, pd->ipi_debug_brk_mask); +} + +void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) +{ + struct ps3_private *pd = &per_cpu(ps3_private, cpu); + + set_bit(63 - virq, &pd->ipi_mask); + + DBG("%s:%d: cpu %u, virq %u, ipi_mask %lxh\n", __func__, __LINE__, + cpu, virq, pd->ipi_mask); +} + +static unsigned int ps3_get_irq(void) +{ + struct ps3_private *pd = this_cpu_ptr(&ps3_private); + u64 x = (pd->bmp.status & pd->bmp.mask); + unsigned int plug; + + /* check for ipi break first to stop this cpu ASAP */ + + if (x & pd->ipi_debug_brk_mask) + x &= pd->ipi_debug_brk_mask; + + asm volatile("cntlzd %0,%1" : "=r" (plug) : "r" (x)); + plug &= 0x3f; + + if (unlikely(!plug)) { + DBG("%s:%d: no plug found: thread_id %llu\n", __func__, + __LINE__, pd->thread_id); + dump_bmp(&per_cpu(ps3_private, 0)); + dump_bmp(&per_cpu(ps3_private, 1)); + return 0; + } + +#if defined(DEBUG) + if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) { + dump_bmp(&per_cpu(ps3_private, 0)); + dump_bmp(&per_cpu(ps3_private, 1)); + BUG(); + } +#endif + + /* IPIs are EOIed here. */ + + if (test_bit(63 - plug, &pd->ipi_mask)) + lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, plug); + + return plug; +} + +void __init ps3_init_IRQ(void) +{ + int result; + unsigned cpu; + struct irq_domain *host; + + host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL); + irq_set_default_host(host); + + for_each_possible_cpu(cpu) { + struct ps3_private *pd = &per_cpu(ps3_private, cpu); + + lv1_get_logical_ppe_id(&pd->ppe_id); + pd->thread_id = get_hard_smp_processor_id(cpu); + spin_lock_init(&pd->bmp_lock); + + DBG("%s:%d: ppe_id %llu, thread_id %llu, bmp %lxh\n", + __func__, __LINE__, pd->ppe_id, pd->thread_id, + ps3_mm_phys_to_lpar(__pa(&pd->bmp))); + + result = lv1_configure_irq_state_bitmap(pd->ppe_id, + pd->thread_id, ps3_mm_phys_to_lpar(__pa(&pd->bmp))); + + if (result) + FAIL("%s:%d: lv1_configure_irq_state_bitmap failed:" + " %s\n", __func__, __LINE__, + ps3_result(result)); + } + + ppc_md.get_irq = ps3_get_irq; +} + +void ps3_shutdown_IRQ(int cpu) +{ + int result; + u64 ppe_id; + u64 thread_id = get_hard_smp_processor_id(cpu); + + lv1_get_logical_ppe_id(&ppe_id); + result = lv1_configure_irq_state_bitmap(ppe_id, thread_id, 0); + + DBG("%s:%d: lv1_configure_irq_state_bitmap (%llu:%llu/%d) %s\n", __func__, + __LINE__, ppe_id, thread_id, cpu, ps3_result(result)); +} diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c new file mode 100644 index 0000000000..1326de55fd --- /dev/null +++ b/arch/powerpc/platforms/ps3/mm.c @@ -0,0 +1,1254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 address space management. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "platform.h" + +#if defined(DEBUG) +#define DBG udbg_printf +#else +#define DBG pr_devel +#endif + +enum { +#if defined(CONFIG_PS3_DYNAMIC_DMA) + USE_DYNAMIC_DMA = 1, +#else + USE_DYNAMIC_DMA = 0, +#endif +}; + +enum { + PAGE_SHIFT_4K = 12U, + PAGE_SHIFT_64K = 16U, + PAGE_SHIFT_16M = 24U, +}; + +static unsigned long __init make_page_sizes(unsigned long a, unsigned long b) +{ + return (a << 56) | (b << 48); +} + +enum { + ALLOCATE_MEMORY_TRY_ALT_UNIT = 0X04, + ALLOCATE_MEMORY_ADDR_ZERO = 0X08, +}; + +/* valid htab sizes are {18,19,20} = 256K, 512K, 1M */ + +enum { + HTAB_SIZE_MAX = 20U, /* HV limit of 1MB */ + HTAB_SIZE_MIN = 18U, /* CPU limit of 256KB */ +}; + +/*============================================================================*/ +/* virtual address space routines */ +/*============================================================================*/ + +/** + * struct mem_region - memory region structure + * @base: base address + * @size: size in bytes + * @offset: difference between base and rm.size + * @destroy: flag if region should be destroyed upon shutdown + */ + +struct mem_region { + u64 base; + u64 size; + unsigned long offset; + int destroy; +}; + +/** + * struct map - address space state variables holder + * @total: total memory available as reported by HV + * @vas_id - HV virtual address space id + * @htab_size: htab size in bytes + * + * The HV virtual address space (vas) allows for hotplug memory regions. + * Memory regions can be created and destroyed in the vas at runtime. + * @rm: real mode (bootmem) region + * @r1: highmem region(s) + * + * ps3 addresses + * virt_addr: a cpu 'translated' effective address + * phys_addr: an address in what Linux thinks is the physical address space + * lpar_addr: an address in the HV virtual address space + * bus_addr: an io controller 'translated' address on a device bus + */ + +struct map { + u64 total; + u64 vas_id; + u64 htab_size; + struct mem_region rm; + struct mem_region r1; +}; + +#define debug_dump_map(x) _debug_dump_map(x, __func__, __LINE__) +static void __maybe_unused _debug_dump_map(const struct map *m, + const char *func, int line) +{ + DBG("%s:%d: map.total = %llxh\n", func, line, m->total); + DBG("%s:%d: map.rm.size = %llxh\n", func, line, m->rm.size); + DBG("%s:%d: map.vas_id = %llu\n", func, line, m->vas_id); + DBG("%s:%d: map.htab_size = %llxh\n", func, line, m->htab_size); + DBG("%s:%d: map.r1.base = %llxh\n", func, line, m->r1.base); + DBG("%s:%d: map.r1.offset = %lxh\n", func, line, m->r1.offset); + DBG("%s:%d: map.r1.size = %llxh\n", func, line, m->r1.size); +} + +static struct map map; + +/** + * ps3_mm_phys_to_lpar - translate a linux physical address to lpar address + * @phys_addr: linux physical address + */ + +unsigned long ps3_mm_phys_to_lpar(unsigned long phys_addr) +{ + BUG_ON(is_kernel_addr(phys_addr)); + return (phys_addr < map.rm.size || phys_addr >= map.total) + ? phys_addr : phys_addr + map.r1.offset; +} + +EXPORT_SYMBOL(ps3_mm_phys_to_lpar); + +/** + * ps3_mm_vas_create - create the virtual address space + */ + +void __init ps3_mm_vas_create(unsigned long* htab_size) +{ + int result; + u64 start_address; + u64 size; + u64 access_right; + u64 max_page_size; + u64 flags; + + result = lv1_query_logical_partition_address_region_info(0, + &start_address, &size, &access_right, &max_page_size, + &flags); + + if (result) { + DBG("%s:%d: lv1_query_logical_partition_address_region_info " + "failed: %s\n", __func__, __LINE__, + ps3_result(result)); + goto fail; + } + + if (max_page_size < PAGE_SHIFT_16M) { + DBG("%s:%d: bad max_page_size %llxh\n", __func__, __LINE__, + max_page_size); + goto fail; + } + + BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE > HTAB_SIZE_MAX); + BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE < HTAB_SIZE_MIN); + + result = lv1_construct_virtual_address_space(CONFIG_PS3_HTAB_SIZE, + 2, make_page_sizes(PAGE_SHIFT_16M, PAGE_SHIFT_64K), + &map.vas_id, &map.htab_size); + + if (result) { + DBG("%s:%d: lv1_construct_virtual_address_space failed: %s\n", + __func__, __LINE__, ps3_result(result)); + goto fail; + } + + result = lv1_select_virtual_address_space(map.vas_id); + + if (result) { + DBG("%s:%d: lv1_select_virtual_address_space failed: %s\n", + __func__, __LINE__, ps3_result(result)); + goto fail; + } + + *htab_size = map.htab_size; + + debug_dump_map(&map); + + return; + +fail: + panic("ps3_mm_vas_create failed"); +} + +/** + * ps3_mm_vas_destroy - + * + * called during kexec sequence with MMU off. + */ + +notrace void ps3_mm_vas_destroy(void) +{ + int result; + + if (map.vas_id) { + result = lv1_select_virtual_address_space(0); + result += lv1_destruct_virtual_address_space(map.vas_id); + + if (result) { + lv1_panic(0); + } + + map.vas_id = 0; + } +} + +static int __init ps3_mm_get_repository_highmem(struct mem_region *r) +{ + int result; + + /* Assume a single highmem region. */ + + result = ps3_repository_read_highmem_info(0, &r->base, &r->size); + + if (result) + goto zero_region; + + if (!r->base || !r->size) { + result = -1; + goto zero_region; + } + + r->offset = r->base - map.rm.size; + + DBG("%s:%d: Found high region in repository: %llxh %llxh\n", + __func__, __LINE__, r->base, r->size); + + return 0; + +zero_region: + DBG("%s:%d: No high region in repository.\n", __func__, __LINE__); + + r->size = r->base = r->offset = 0; + return result; +} + +static int ps3_mm_set_repository_highmem(const struct mem_region *r) +{ + /* Assume a single highmem region. */ + + return r ? ps3_repository_write_highmem_info(0, r->base, r->size) : + ps3_repository_write_highmem_info(0, 0, 0); +} + +/** + * ps3_mm_region_create - create a memory region in the vas + * @r: pointer to a struct mem_region to accept initialized values + * @size: requested region size + * + * This implementation creates the region with the vas large page size. + * @size is rounded down to a multiple of the vas large page size. + */ + +static int ps3_mm_region_create(struct mem_region *r, unsigned long size) +{ + int result; + u64 muid; + + r->size = ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); + + DBG("%s:%d requested %lxh\n", __func__, __LINE__, size); + DBG("%s:%d actual %llxh\n", __func__, __LINE__, r->size); + DBG("%s:%d difference %llxh (%lluMB)\n", __func__, __LINE__, + size - r->size, (size - r->size) / 1024 / 1024); + + if (r->size == 0) { + DBG("%s:%d: size == 0\n", __func__, __LINE__); + result = -1; + goto zero_region; + } + + result = lv1_allocate_memory(r->size, PAGE_SHIFT_16M, 0, + ALLOCATE_MEMORY_TRY_ALT_UNIT, &r->base, &muid); + + if (result || r->base < map.rm.size) { + DBG("%s:%d: lv1_allocate_memory failed: %s\n", + __func__, __LINE__, ps3_result(result)); + goto zero_region; + } + + r->destroy = 1; + r->offset = r->base - map.rm.size; + return result; + +zero_region: + r->size = r->base = r->offset = 0; + return result; +} + +/** + * ps3_mm_region_destroy - destroy a memory region + * @r: pointer to struct mem_region + */ + +static void ps3_mm_region_destroy(struct mem_region *r) +{ + int result; + + if (!r->destroy) { + return; + } + + if (r->base) { + result = lv1_release_memory(r->base); + + if (result) { + lv1_panic(0); + } + + r->size = r->base = r->offset = 0; + map.total = map.rm.size; + } + + ps3_mm_set_repository_highmem(NULL); +} + +/*============================================================================*/ +/* dma routines */ +/*============================================================================*/ + +/** + * dma_sb_lpar_to_bus - Translate an lpar address to ioc mapped bus address. + * @r: pointer to dma region structure + * @lpar_addr: HV lpar address + */ + +static unsigned long dma_sb_lpar_to_bus(struct ps3_dma_region *r, + unsigned long lpar_addr) +{ + if (lpar_addr >= map.rm.size) + lpar_addr -= map.r1.offset; + BUG_ON(lpar_addr < r->offset); + BUG_ON(lpar_addr >= r->offset + r->len); + return r->bus_addr + lpar_addr - r->offset; +} + +#define dma_dump_region(_a) _dma_dump_region(_a, __func__, __LINE__) +static void __maybe_unused _dma_dump_region(const struct ps3_dma_region *r, + const char *func, int line) +{ + DBG("%s:%d: dev %llu:%llu\n", func, line, r->dev->bus_id, + r->dev->dev_id); + DBG("%s:%d: page_size %u\n", func, line, r->page_size); + DBG("%s:%d: bus_addr %lxh\n", func, line, r->bus_addr); + DBG("%s:%d: len %lxh\n", func, line, r->len); + DBG("%s:%d: offset %lxh\n", func, line, r->offset); +} + + /** + * dma_chunk - A chunk of dma pages mapped by the io controller. + * @region - The dma region that owns this chunk. + * @lpar_addr: Starting lpar address of the area to map. + * @bus_addr: Starting ioc bus address of the area to map. + * @len: Length in bytes of the area to map. + * @link: A struct list_head used with struct ps3_dma_region.chunk_list, the + * list of all chunks owned by the region. + * + * This implementation uses a very simple dma page manager + * based on the dma_chunk structure. This scheme assumes + * that all drivers use very well behaved dma ops. + */ + +struct dma_chunk { + struct ps3_dma_region *region; + unsigned long lpar_addr; + unsigned long bus_addr; + unsigned long len; + struct list_head link; + unsigned int usage_count; +}; + +#define dma_dump_chunk(_a) _dma_dump_chunk(_a, __func__, __LINE__) +static void _dma_dump_chunk (const struct dma_chunk* c, const char* func, + int line) +{ + DBG("%s:%d: r.dev %llu:%llu\n", func, line, + c->region->dev->bus_id, c->region->dev->dev_id); + DBG("%s:%d: r.bus_addr %lxh\n", func, line, c->region->bus_addr); + DBG("%s:%d: r.page_size %u\n", func, line, c->region->page_size); + DBG("%s:%d: r.len %lxh\n", func, line, c->region->len); + DBG("%s:%d: r.offset %lxh\n", func, line, c->region->offset); + DBG("%s:%d: c.lpar_addr %lxh\n", func, line, c->lpar_addr); + DBG("%s:%d: c.bus_addr %lxh\n", func, line, c->bus_addr); + DBG("%s:%d: c.len %lxh\n", func, line, c->len); +} + +static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r, + unsigned long bus_addr, unsigned long len) +{ + struct dma_chunk *c; + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len+bus_addr-aligned_bus, + 1 << r->page_size); + + list_for_each_entry(c, &r->chunk_list.head, link) { + /* intersection */ + if (aligned_bus >= c->bus_addr && + aligned_bus + aligned_len <= c->bus_addr + c->len) + return c; + + /* below */ + if (aligned_bus + aligned_len <= c->bus_addr) + continue; + + /* above */ + if (aligned_bus >= c->bus_addr + c->len) + continue; + + /* we don't handle the multi-chunk case for now */ + dma_dump_chunk(c); + BUG(); + } + return NULL; +} + +static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r, + unsigned long lpar_addr, unsigned long len) +{ + struct dma_chunk *c; + unsigned long aligned_lpar = ALIGN_DOWN(lpar_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + lpar_addr - aligned_lpar, + 1 << r->page_size); + + list_for_each_entry(c, &r->chunk_list.head, link) { + /* intersection */ + if (c->lpar_addr <= aligned_lpar && + aligned_lpar < c->lpar_addr + c->len) { + if (aligned_lpar + aligned_len <= c->lpar_addr + c->len) + return c; + else { + dma_dump_chunk(c); + BUG(); + } + } + /* below */ + if (aligned_lpar + aligned_len <= c->lpar_addr) { + continue; + } + /* above */ + if (c->lpar_addr + c->len <= aligned_lpar) { + continue; + } + } + return NULL; +} + +static int dma_sb_free_chunk(struct dma_chunk *c) +{ + int result = 0; + + if (c->bus_addr) { + result = lv1_unmap_device_dma_region(c->region->dev->bus_id, + c->region->dev->dev_id, c->bus_addr, c->len); + BUG_ON(result); + } + + kfree(c); + return result; +} + +static int dma_ioc0_free_chunk(struct dma_chunk *c) +{ + int result = 0; + int iopage; + unsigned long offset; + struct ps3_dma_region *r = c->region; + + DBG("%s:start\n", __func__); + for (iopage = 0; iopage < (c->len >> r->page_size); iopage++) { + offset = (1 << r->page_size) * iopage; + /* put INVALID entry */ + result = lv1_put_iopte(0, + c->bus_addr + offset, + c->lpar_addr + offset, + r->ioid, + 0); + DBG("%s: bus=%#lx, lpar=%#lx, ioid=%d\n", __func__, + c->bus_addr + offset, + c->lpar_addr + offset, + r->ioid); + + if (result) { + DBG("%s:%d: lv1_put_iopte failed: %s\n", __func__, + __LINE__, ps3_result(result)); + } + } + kfree(c); + DBG("%s:end\n", __func__); + return result; +} + +/** + * dma_sb_map_pages - Maps dma pages into the io controller bus address space. + * @r: Pointer to a struct ps3_dma_region. + * @phys_addr: Starting physical address of the area to map. + * @len: Length in bytes of the area to map. + * c_out: A pointer to receive an allocated struct dma_chunk for this area. + * + * This is the lowest level dma mapping routine, and is the one that will + * make the HV call to add the pages into the io controller address space. + */ + +static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, + unsigned long len, struct dma_chunk **c_out, u64 iopte_flag) +{ + int result; + struct dma_chunk *c; + + c = kzalloc(sizeof(*c), GFP_ATOMIC); + if (!c) { + result = -ENOMEM; + goto fail_alloc; + } + + c->region = r; + c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr); + c->bus_addr = dma_sb_lpar_to_bus(r, c->lpar_addr); + c->len = len; + + BUG_ON(iopte_flag != 0xf800000000000000UL); + result = lv1_map_device_dma_region(c->region->dev->bus_id, + c->region->dev->dev_id, c->lpar_addr, + c->bus_addr, c->len, iopte_flag); + if (result) { + DBG("%s:%d: lv1_map_device_dma_region failed: %s\n", + __func__, __LINE__, ps3_result(result)); + goto fail_map; + } + + list_add(&c->link, &r->chunk_list.head); + + *c_out = c; + return 0; + +fail_map: + kfree(c); +fail_alloc: + *c_out = NULL; + DBG(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, + unsigned long len, struct dma_chunk **c_out, + u64 iopte_flag) +{ + int result; + struct dma_chunk *c, *last; + int iopage, pages; + unsigned long offset; + + DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__, + phys_addr, ps3_mm_phys_to_lpar(phys_addr), len); + c = kzalloc(sizeof(*c), GFP_ATOMIC); + if (!c) { + result = -ENOMEM; + goto fail_alloc; + } + + c->region = r; + c->len = len; + c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr); + /* allocate IO address */ + if (list_empty(&r->chunk_list.head)) { + /* first one */ + c->bus_addr = r->bus_addr; + } else { + /* derive from last bus addr*/ + last = list_entry(r->chunk_list.head.next, + struct dma_chunk, link); + c->bus_addr = last->bus_addr + last->len; + DBG("%s: last bus=%#lx, len=%#lx\n", __func__, + last->bus_addr, last->len); + } + + /* FIXME: check whether length exceeds region size */ + + /* build ioptes for the area */ + pages = len >> r->page_size; + DBG("%s: pgsize=%#x len=%#lx pages=%#x iopteflag=%#llx\n", __func__, + r->page_size, r->len, pages, iopte_flag); + for (iopage = 0; iopage < pages; iopage++) { + offset = (1 << r->page_size) * iopage; + result = lv1_put_iopte(0, + c->bus_addr + offset, + c->lpar_addr + offset, + r->ioid, + iopte_flag); + if (result) { + pr_warn("%s:%d: lv1_put_iopte failed: %s\n", + __func__, __LINE__, ps3_result(result)); + goto fail_map; + } + DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__, + iopage, c->bus_addr + offset, c->lpar_addr + offset, + r->ioid); + } + + /* be sure that last allocated one is inserted at head */ + list_add(&c->link, &r->chunk_list.head); + + *c_out = c; + DBG("%s: end\n", __func__); + return 0; + +fail_map: + for (iopage--; 0 <= iopage; iopage--) { + lv1_put_iopte(0, + c->bus_addr + offset, + c->lpar_addr + offset, + r->ioid, + 0); + } + kfree(c); +fail_alloc: + *c_out = NULL; + return result; +} + +/** + * dma_sb_region_create - Create a device dma region. + * @r: Pointer to a struct ps3_dma_region. + * + * This is the lowest level dma region create routine, and is the one that + * will make the HV call to create the region. + */ + +static int dma_sb_region_create(struct ps3_dma_region *r) +{ + int result; + u64 bus_addr; + + DBG(" -> %s:%d:\n", __func__, __LINE__); + + BUG_ON(!r); + + if (!r->dev->bus_id) { + pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__, + r->dev->bus_id, r->dev->dev_id); + return 0; + } + + DBG("%s:%u: len = 0x%lx, page_size = %u, offset = 0x%lx\n", __func__, + __LINE__, r->len, r->page_size, r->offset); + + BUG_ON(!r->len); + BUG_ON(!r->page_size); + BUG_ON(!r->region_ops); + + INIT_LIST_HEAD(&r->chunk_list.head); + spin_lock_init(&r->chunk_list.lock); + + result = lv1_allocate_device_dma_region(r->dev->bus_id, r->dev->dev_id, + roundup_pow_of_two(r->len), r->page_size, r->region_type, + &bus_addr); + r->bus_addr = bus_addr; + + if (result) { + DBG("%s:%d: lv1_allocate_device_dma_region failed: %s\n", + __func__, __LINE__, ps3_result(result)); + r->len = r->bus_addr = 0; + } + + return result; +} + +static int dma_ioc0_region_create(struct ps3_dma_region *r) +{ + int result; + u64 bus_addr; + + INIT_LIST_HEAD(&r->chunk_list.head); + spin_lock_init(&r->chunk_list.lock); + + result = lv1_allocate_io_segment(0, + r->len, + r->page_size, + &bus_addr); + r->bus_addr = bus_addr; + if (result) { + DBG("%s:%d: lv1_allocate_io_segment failed: %s\n", + __func__, __LINE__, ps3_result(result)); + r->len = r->bus_addr = 0; + } + DBG("%s: len=%#lx, pg=%d, bus=%#lx\n", __func__, + r->len, r->page_size, r->bus_addr); + return result; +} + +/** + * dma_region_free - Free a device dma region. + * @r: Pointer to a struct ps3_dma_region. + * + * This is the lowest level dma region free routine, and is the one that + * will make the HV call to free the region. + */ + +static int dma_sb_region_free(struct ps3_dma_region *r) +{ + int result; + struct dma_chunk *c; + struct dma_chunk *tmp; + + BUG_ON(!r); + + if (!r->dev->bus_id) { + pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__, + r->dev->bus_id, r->dev->dev_id); + return 0; + } + + list_for_each_entry_safe(c, tmp, &r->chunk_list.head, link) { + list_del(&c->link); + dma_sb_free_chunk(c); + } + + result = lv1_free_device_dma_region(r->dev->bus_id, r->dev->dev_id, + r->bus_addr); + + if (result) + DBG("%s:%d: lv1_free_device_dma_region failed: %s\n", + __func__, __LINE__, ps3_result(result)); + + r->bus_addr = 0; + + return result; +} + +static int dma_ioc0_region_free(struct ps3_dma_region *r) +{ + int result; + struct dma_chunk *c, *n; + + DBG("%s: start\n", __func__); + list_for_each_entry_safe(c, n, &r->chunk_list.head, link) { + list_del(&c->link); + dma_ioc0_free_chunk(c); + } + + result = lv1_release_io_segment(0, r->bus_addr); + + if (result) + DBG("%s:%d: lv1_free_device_dma_region failed: %s\n", + __func__, __LINE__, ps3_result(result)); + + r->bus_addr = 0; + DBG("%s: end\n", __func__); + + return result; +} + +/** + * dma_sb_map_area - Map an area of memory into a device dma region. + * @r: Pointer to a struct ps3_dma_region. + * @virt_addr: Starting virtual address of the area to map. + * @len: Length in bytes of the area to map. + * @bus_addr: A pointer to return the starting ioc bus address of the area to + * map. + * + * This is the common dma mapping routine. + */ + +static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr, + unsigned long len, dma_addr_t *bus_addr, + u64 iopte_flag) +{ + int result; + unsigned long flags; + struct dma_chunk *c; + unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) + : virt_addr; + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, + 1 << r->page_size); + *bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr)); + + if (!USE_DYNAMIC_DMA) { + unsigned long lpar_addr = ps3_mm_phys_to_lpar(phys_addr); + DBG(" -> %s:%d\n", __func__, __LINE__); + DBG("%s:%d virt_addr %lxh\n", __func__, __LINE__, + virt_addr); + DBG("%s:%d phys_addr %lxh\n", __func__, __LINE__, + phys_addr); + DBG("%s:%d lpar_addr %lxh\n", __func__, __LINE__, + lpar_addr); + DBG("%s:%d len %lxh\n", __func__, __LINE__, len); + DBG("%s:%d bus_addr %llxh (%lxh)\n", __func__, __LINE__, + *bus_addr, len); + } + + spin_lock_irqsave(&r->chunk_list.lock, flags); + c = dma_find_chunk(r, *bus_addr, len); + + if (c) { + DBG("%s:%d: reusing mapped chunk", __func__, __LINE__); + dma_dump_chunk(c); + c->usage_count++; + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + return 0; + } + + result = dma_sb_map_pages(r, aligned_phys, aligned_len, &c, iopte_flag); + + if (result) { + *bus_addr = 0; + DBG("%s:%d: dma_sb_map_pages failed (%d)\n", + __func__, __LINE__, result); + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + return result; + } + + c->usage_count = 1; + + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + return result; +} + +static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr, + unsigned long len, dma_addr_t *bus_addr, + u64 iopte_flag) +{ + int result; + unsigned long flags; + struct dma_chunk *c; + unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) + : virt_addr; + unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys, + 1 << r->page_size); + + DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__, + virt_addr, len); + DBG(KERN_ERR "%s: ph=%#lx a_ph=%#lx a_l=%#lx\n", __func__, + phys_addr, aligned_phys, aligned_len); + + spin_lock_irqsave(&r->chunk_list.lock, flags); + c = dma_find_chunk_lpar(r, ps3_mm_phys_to_lpar(phys_addr), len); + + if (c) { + /* FIXME */ + BUG(); + *bus_addr = c->bus_addr + phys_addr - aligned_phys; + c->usage_count++; + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + return 0; + } + + result = dma_ioc0_map_pages(r, aligned_phys, aligned_len, &c, + iopte_flag); + + if (result) { + *bus_addr = 0; + DBG("%s:%d: dma_ioc0_map_pages failed (%d)\n", + __func__, __LINE__, result); + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + return result; + } + *bus_addr = c->bus_addr + phys_addr - aligned_phys; + DBG("%s: va=%#lx pa=%#lx a_pa=%#lx bus=%#llx\n", __func__, + virt_addr, phys_addr, aligned_phys, *bus_addr); + c->usage_count = 1; + + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + return result; +} + +/** + * dma_sb_unmap_area - Unmap an area of memory from a device dma region. + * @r: Pointer to a struct ps3_dma_region. + * @bus_addr: The starting ioc bus address of the area to unmap. + * @len: Length in bytes of the area to unmap. + * + * This is the common dma unmap routine. + */ + +static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr, + unsigned long len) +{ + unsigned long flags; + struct dma_chunk *c; + + spin_lock_irqsave(&r->chunk_list.lock, flags); + c = dma_find_chunk(r, bus_addr, len); + + if (!c) { + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, + 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + bus_addr + - aligned_bus, 1 << r->page_size); + DBG("%s:%d: not found: bus_addr %llxh\n", + __func__, __LINE__, bus_addr); + DBG("%s:%d: not found: len %lxh\n", + __func__, __LINE__, len); + DBG("%s:%d: not found: aligned_bus %lxh\n", + __func__, __LINE__, aligned_bus); + DBG("%s:%d: not found: aligned_len %lxh\n", + __func__, __LINE__, aligned_len); + BUG(); + } + + c->usage_count--; + + if (!c->usage_count) { + list_del(&c->link); + dma_sb_free_chunk(c); + } + + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + return 0; +} + +static int dma_ioc0_unmap_area(struct ps3_dma_region *r, + dma_addr_t bus_addr, unsigned long len) +{ + unsigned long flags; + struct dma_chunk *c; + + DBG("%s: start a=%#llx l=%#lx\n", __func__, bus_addr, len); + spin_lock_irqsave(&r->chunk_list.lock, flags); + c = dma_find_chunk(r, bus_addr, len); + + if (!c) { + unsigned long aligned_bus = ALIGN_DOWN(bus_addr, + 1 << r->page_size); + unsigned long aligned_len = ALIGN(len + bus_addr + - aligned_bus, + 1 << r->page_size); + DBG("%s:%d: not found: bus_addr %llxh\n", + __func__, __LINE__, bus_addr); + DBG("%s:%d: not found: len %lxh\n", + __func__, __LINE__, len); + DBG("%s:%d: not found: aligned_bus %lxh\n", + __func__, __LINE__, aligned_bus); + DBG("%s:%d: not found: aligned_len %lxh\n", + __func__, __LINE__, aligned_len); + BUG(); + } + + c->usage_count--; + + if (!c->usage_count) { + list_del(&c->link); + dma_ioc0_free_chunk(c); + } + + spin_unlock_irqrestore(&r->chunk_list.lock, flags); + DBG("%s: end\n", __func__); + return 0; +} + +/** + * dma_sb_region_create_linear - Setup a linear dma mapping for a device. + * @r: Pointer to a struct ps3_dma_region. + * + * This routine creates an HV dma region for the device and maps all available + * ram into the io controller bus address space. + */ + +static int dma_sb_region_create_linear(struct ps3_dma_region *r) +{ + int result; + unsigned long virt_addr, len; + dma_addr_t tmp; + + if (r->len > 16*1024*1024) { /* FIXME: need proper fix */ + /* force 16M dma pages for linear mapping */ + if (r->page_size != PS3_DMA_16M) { + pr_info("%s:%d: forcing 16M pages for linear map\n", + __func__, __LINE__); + r->page_size = PS3_DMA_16M; + r->len = ALIGN(r->len, 1 << r->page_size); + } + } + + result = dma_sb_region_create(r); + BUG_ON(result); + + if (r->offset < map.rm.size) { + /* Map (part of) 1st RAM chunk */ + virt_addr = map.rm.base + r->offset; + len = map.rm.size - r->offset; + if (len > r->len) + len = r->len; + result = dma_sb_map_area(r, virt_addr, len, &tmp, + CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW | + CBE_IOPTE_M); + BUG_ON(result); + } + + if (r->offset + r->len > map.rm.size) { + /* Map (part of) 2nd RAM chunk */ + virt_addr = map.rm.size; + len = r->len; + if (r->offset >= map.rm.size) + virt_addr += r->offset - map.rm.size; + else + len -= map.rm.size - r->offset; + result = dma_sb_map_area(r, virt_addr, len, &tmp, + CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW | + CBE_IOPTE_M); + BUG_ON(result); + } + + return result; +} + +/** + * dma_sb_region_free_linear - Free a linear dma mapping for a device. + * @r: Pointer to a struct ps3_dma_region. + * + * This routine will unmap all mapped areas and free the HV dma region. + */ + +static int dma_sb_region_free_linear(struct ps3_dma_region *r) +{ + int result; + dma_addr_t bus_addr; + unsigned long len, lpar_addr; + + if (r->offset < map.rm.size) { + /* Unmap (part of) 1st RAM chunk */ + lpar_addr = map.rm.base + r->offset; + len = map.rm.size - r->offset; + if (len > r->len) + len = r->len; + bus_addr = dma_sb_lpar_to_bus(r, lpar_addr); + result = dma_sb_unmap_area(r, bus_addr, len); + BUG_ON(result); + } + + if (r->offset + r->len > map.rm.size) { + /* Unmap (part of) 2nd RAM chunk */ + lpar_addr = map.r1.base; + len = r->len; + if (r->offset >= map.rm.size) + lpar_addr += r->offset - map.rm.size; + else + len -= map.rm.size - r->offset; + bus_addr = dma_sb_lpar_to_bus(r, lpar_addr); + result = dma_sb_unmap_area(r, bus_addr, len); + BUG_ON(result); + } + + result = dma_sb_region_free(r); + BUG_ON(result); + + return result; +} + +/** + * dma_sb_map_area_linear - Map an area of memory into a device dma region. + * @r: Pointer to a struct ps3_dma_region. + * @virt_addr: Starting virtual address of the area to map. + * @len: Length in bytes of the area to map. + * @bus_addr: A pointer to return the starting ioc bus address of the area to + * map. + * + * This routine just returns the corresponding bus address. Actual mapping + * occurs in dma_region_create_linear(). + */ + +static int dma_sb_map_area_linear(struct ps3_dma_region *r, + unsigned long virt_addr, unsigned long len, dma_addr_t *bus_addr, + u64 iopte_flag) +{ + unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) + : virt_addr; + *bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr)); + return 0; +} + +/** + * dma_unmap_area_linear - Unmap an area of memory from a device dma region. + * @r: Pointer to a struct ps3_dma_region. + * @bus_addr: The starting ioc bus address of the area to unmap. + * @len: Length in bytes of the area to unmap. + * + * This routine does nothing. Unmapping occurs in dma_sb_region_free_linear(). + */ + +static int dma_sb_unmap_area_linear(struct ps3_dma_region *r, + dma_addr_t bus_addr, unsigned long len) +{ + return 0; +}; + +static const struct ps3_dma_region_ops ps3_dma_sb_region_ops = { + .create = dma_sb_region_create, + .free = dma_sb_region_free, + .map = dma_sb_map_area, + .unmap = dma_sb_unmap_area +}; + +static const struct ps3_dma_region_ops ps3_dma_sb_region_linear_ops = { + .create = dma_sb_region_create_linear, + .free = dma_sb_region_free_linear, + .map = dma_sb_map_area_linear, + .unmap = dma_sb_unmap_area_linear +}; + +static const struct ps3_dma_region_ops ps3_dma_ioc0_region_ops = { + .create = dma_ioc0_region_create, + .free = dma_ioc0_region_free, + .map = dma_ioc0_map_area, + .unmap = dma_ioc0_unmap_area +}; + +int ps3_dma_region_init(struct ps3_system_bus_device *dev, + struct ps3_dma_region *r, enum ps3_dma_page_size page_size, + enum ps3_dma_region_type region_type, void *addr, unsigned long len) +{ + unsigned long lpar_addr; + int result; + + lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0; + + r->dev = dev; + r->page_size = page_size; + r->region_type = region_type; + r->offset = lpar_addr; + if (r->offset >= map.rm.size) + r->offset -= map.r1.offset; + r->len = len ? len : ALIGN(map.total, 1 << r->page_size); + + dev->core.dma_mask = &r->dma_mask; + + result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32)); + + if (result < 0) { + dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n", + __func__, __LINE__, result); + return result; + } + + switch (dev->dev_type) { + case PS3_DEVICE_TYPE_SB: + r->region_ops = (USE_DYNAMIC_DMA) + ? &ps3_dma_sb_region_ops + : &ps3_dma_sb_region_linear_ops; + break; + case PS3_DEVICE_TYPE_IOC0: + r->region_ops = &ps3_dma_ioc0_region_ops; + break; + default: + BUG(); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(ps3_dma_region_init); + +int ps3_dma_region_create(struct ps3_dma_region *r) +{ + BUG_ON(!r); + BUG_ON(!r->region_ops); + BUG_ON(!r->region_ops->create); + return r->region_ops->create(r); +} +EXPORT_SYMBOL(ps3_dma_region_create); + +int ps3_dma_region_free(struct ps3_dma_region *r) +{ + BUG_ON(!r); + BUG_ON(!r->region_ops); + BUG_ON(!r->region_ops->free); + return r->region_ops->free(r); +} +EXPORT_SYMBOL(ps3_dma_region_free); + +int ps3_dma_map(struct ps3_dma_region *r, unsigned long virt_addr, + unsigned long len, dma_addr_t *bus_addr, + u64 iopte_flag) +{ + return r->region_ops->map(r, virt_addr, len, bus_addr, iopte_flag); +} + +int ps3_dma_unmap(struct ps3_dma_region *r, dma_addr_t bus_addr, + unsigned long len) +{ + return r->region_ops->unmap(r, bus_addr, len); +} + +/*============================================================================*/ +/* system startup routines */ +/*============================================================================*/ + +/** + * ps3_mm_init - initialize the address space state variables + */ + +void __init ps3_mm_init(void) +{ + int result; + + DBG(" -> %s:%d\n", __func__, __LINE__); + + result = ps3_repository_read_mm_info(&map.rm.base, &map.rm.size, + &map.total); + + if (result) + panic("ps3_repository_read_mm_info() failed"); + + map.rm.offset = map.rm.base; + map.vas_id = map.htab_size = 0; + + /* this implementation assumes map.rm.base is zero */ + + BUG_ON(map.rm.base); + BUG_ON(!map.rm.size); + + /* Check if we got the highmem region from an earlier boot step */ + + if (ps3_mm_get_repository_highmem(&map.r1)) { + result = ps3_mm_region_create(&map.r1, map.total - map.rm.size); + + if (!result) + ps3_mm_set_repository_highmem(&map.r1); + } + + /* correct map.total for the real total amount of memory we use */ + map.total = map.rm.size + map.r1.size; + + if (!map.r1.size) { + DBG("%s:%d: No highmem region found\n", __func__, __LINE__); + } else { + DBG("%s:%d: Adding highmem region: %llxh %llxh\n", + __func__, __LINE__, map.rm.size, + map.total - map.rm.size); + memblock_add(map.rm.size, map.total - map.rm.size); + } + + DBG(" <- %s:%d\n", __func__, __LINE__); +} + +/** + * ps3_mm_shutdown - final cleanup of address space + * + * called during kexec sequence with MMU off. + */ + +notrace void ps3_mm_shutdown(void) +{ + ps3_mm_region_destroy(&map.r1); +} diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c new file mode 100644 index 0000000000..b384cd2d6b --- /dev/null +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 flash memory os area. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "platform.h" + +enum { + OS_AREA_SEGMENT_SIZE = 0X200, +}; + +enum os_area_ldr_format { + HEADER_LDR_FORMAT_RAW = 0, + HEADER_LDR_FORMAT_GZIP = 1, +}; + +#define OS_AREA_HEADER_MAGIC_NUM "cell_ext_os_area" + +/** + * struct os_area_header - os area header segment. + * @magic_num: Always 'cell_ext_os_area'. + * @hdr_version: Header format version number. + * @db_area_offset: Starting segment number of other os database area. + * @ldr_area_offset: Starting segment number of bootloader image area. + * @ldr_format: HEADER_LDR_FORMAT flag. + * @ldr_size: Size of bootloader image in bytes. + * + * Note that the docs refer to area offsets. These are offsets in units of + * segments from the start of the os area (top of the header). These are + * better thought of as segment numbers. The os area of the os area is + * reserved for the os image. + */ + +struct os_area_header { + u8 magic_num[16]; + u32 hdr_version; + u32 db_area_offset; + u32 ldr_area_offset; + u32 _reserved_1; + u32 ldr_format; + u32 ldr_size; + u32 _reserved_2[6]; +}; + +enum os_area_boot_flag { + PARAM_BOOT_FLAG_GAME_OS = 0, + PARAM_BOOT_FLAG_OTHER_OS = 1, +}; + +enum os_area_ctrl_button { + PARAM_CTRL_BUTTON_O_IS_YES = 0, + PARAM_CTRL_BUTTON_X_IS_YES = 1, +}; + +/** + * struct os_area_params - os area params segment. + * @boot_flag: User preference of operating system, PARAM_BOOT_FLAG flag. + * @num_params: Number of params in this (params) segment. + * @rtc_diff: Difference in seconds between 1970 and the ps3 rtc value. + * @av_multi_out: User preference of AV output, PARAM_AV_MULTI_OUT flag. + * @ctrl_button: User preference of controller button config, PARAM_CTRL_BUTTON + * flag. + * @static_ip_addr: User preference of static IP address. + * @network_mask: User preference of static network mask. + * @default_gateway: User preference of static default gateway. + * @dns_primary: User preference of static primary dns server. + * @dns_secondary: User preference of static secondary dns server. + * + * The ps3 rtc maintains a read-only value that approximates seconds since + * 2000-01-01 00:00:00 UTC. + * + * User preference of zero for static_ip_addr means use dhcp. + */ + +struct os_area_params { + u32 boot_flag; + u32 _reserved_1[3]; + u32 num_params; + u32 _reserved_2[3]; + /* param 0 */ + s64 rtc_diff; + u8 av_multi_out; + u8 ctrl_button; + u8 _reserved_3[6]; + /* param 1 */ + u8 static_ip_addr[4]; + u8 network_mask[4]; + u8 default_gateway[4]; + u8 _reserved_4[4]; + /* param 2 */ + u8 dns_primary[4]; + u8 dns_secondary[4]; + u8 _reserved_5[8]; +}; + +#define OS_AREA_DB_MAGIC_NUM "-db-" + +/** + * struct os_area_db - Shared flash memory database. + * @magic_num: Always '-db-'. + * @version: os_area_db format version number. + * @index_64: byte offset of the database id index for 64 bit variables. + * @count_64: number of usable 64 bit index entries + * @index_32: byte offset of the database id index for 32 bit variables. + * @count_32: number of usable 32 bit index entries + * @index_16: byte offset of the database id index for 16 bit variables. + * @count_16: number of usable 16 bit index entries + * + * Flash rom storage for exclusive use by guests running in the other os lpar. + * The current system configuration allocates 1K (two segments) for other os + * use. + */ + +struct os_area_db { + u8 magic_num[4]; + u16 version; + u16 _reserved_1; + u16 index_64; + u16 count_64; + u16 index_32; + u16 count_32; + u16 index_16; + u16 count_16; + u32 _reserved_2; + u8 _db_data[1000]; +}; + +/** + * enum os_area_db_owner - Data owners. + */ + +enum os_area_db_owner { + OS_AREA_DB_OWNER_ANY = -1, + OS_AREA_DB_OWNER_NONE = 0, + OS_AREA_DB_OWNER_PROTOTYPE = 1, + OS_AREA_DB_OWNER_LINUX = 2, + OS_AREA_DB_OWNER_PETITBOOT = 3, + OS_AREA_DB_OWNER_MAX = 32, +}; + +enum os_area_db_key { + OS_AREA_DB_KEY_ANY = -1, + OS_AREA_DB_KEY_NONE = 0, + OS_AREA_DB_KEY_RTC_DIFF = 1, + OS_AREA_DB_KEY_VIDEO_MODE = 2, + OS_AREA_DB_KEY_MAX = 8, +}; + +struct os_area_db_id { + int owner; + int key; +}; + +static const struct os_area_db_id os_area_db_id_empty = { + .owner = OS_AREA_DB_OWNER_NONE, + .key = OS_AREA_DB_KEY_NONE +}; + +static const struct os_area_db_id os_area_db_id_any = { + .owner = OS_AREA_DB_OWNER_ANY, + .key = OS_AREA_DB_KEY_ANY +}; + +static const struct os_area_db_id os_area_db_id_rtc_diff = { + .owner = OS_AREA_DB_OWNER_LINUX, + .key = OS_AREA_DB_KEY_RTC_DIFF +}; + +#define SECONDS_FROM_1970_TO_2000 946684800LL + +/** + * struct saved_params - Static working copies of data from the PS3 'os area'. + * + * The order of preference we use for the rtc_diff source: + * 1) The database value. + * 2) The game os value. + * 3) The number of seconds from 1970 to 2000. + */ + +static struct saved_params { + unsigned int valid; + s64 rtc_diff; + unsigned int av_multi_out; +} saved_params; + +static struct property property_rtc_diff = { + .name = "linux,rtc_diff", + .length = sizeof(saved_params.rtc_diff), + .value = &saved_params.rtc_diff, +}; + +static struct property property_av_multi_out = { + .name = "linux,av_multi_out", + .length = sizeof(saved_params.av_multi_out), + .value = &saved_params.av_multi_out, +}; + + +static DEFINE_MUTEX(os_area_flash_mutex); + +static const struct ps3_os_area_flash_ops *os_area_flash_ops; + +void ps3_os_area_flash_register(const struct ps3_os_area_flash_ops *ops) +{ + mutex_lock(&os_area_flash_mutex); + os_area_flash_ops = ops; + mutex_unlock(&os_area_flash_mutex); +} +EXPORT_SYMBOL_GPL(ps3_os_area_flash_register); + +static ssize_t os_area_flash_read(void *buf, size_t count, loff_t pos) +{ + ssize_t res = -ENODEV; + + mutex_lock(&os_area_flash_mutex); + if (os_area_flash_ops) + res = os_area_flash_ops->read(buf, count, pos); + mutex_unlock(&os_area_flash_mutex); + + return res; +} + +static ssize_t os_area_flash_write(const void *buf, size_t count, loff_t pos) +{ + ssize_t res = -ENODEV; + + mutex_lock(&os_area_flash_mutex); + if (os_area_flash_ops) + res = os_area_flash_ops->write(buf, count, pos); + mutex_unlock(&os_area_flash_mutex); + + return res; +} + + +/** + * os_area_set_property - Add or overwrite a saved_params value to the device tree. + * + * Overwrites an existing property. + */ + +static void os_area_set_property(struct device_node *node, + struct property *prop) +{ + int result; + struct property *tmp = of_find_property(node, prop->name, NULL); + + if (tmp) { + pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name); + of_remove_property(node, tmp); + } + + result = of_add_property(node, prop); + + if (result) + pr_debug("%s:%d of_set_property failed\n", __func__, + __LINE__); +} + +/** + * os_area_get_property - Get a saved_params value from the device tree. + * + */ + +static void __init os_area_get_property(struct device_node *node, + struct property *prop) +{ + const struct property *tmp = of_find_property(node, prop->name, NULL); + + if (tmp) { + BUG_ON(prop->length != tmp->length); + memcpy(prop->value, tmp->value, prop->length); + } else + pr_debug("%s:%d not found %s\n", __func__, __LINE__, + prop->name); +} + +static void dump_field(char *s, const u8 *field, int size_of_field) +{ +#if defined(DEBUG) + int i; + + for (i = 0; i < size_of_field; i++) + s[i] = isprint(field[i]) ? field[i] : '.'; + s[i] = 0; +#endif +} + +#define dump_header(_a) _dump_header(_a, __func__, __LINE__) +static void _dump_header(const struct os_area_header *h, const char *func, + int line) +{ + char str[sizeof(h->magic_num) + 1]; + + dump_field(str, h->magic_num, sizeof(h->magic_num)); + pr_debug("%s:%d: h.magic_num: '%s'\n", func, line, + str); + pr_debug("%s:%d: h.hdr_version: %u\n", func, line, + h->hdr_version); + pr_debug("%s:%d: h.db_area_offset: %u\n", func, line, + h->db_area_offset); + pr_debug("%s:%d: h.ldr_area_offset: %u\n", func, line, + h->ldr_area_offset); + pr_debug("%s:%d: h.ldr_format: %u\n", func, line, + h->ldr_format); + pr_debug("%s:%d: h.ldr_size: %xh\n", func, line, + h->ldr_size); +} + +#define dump_params(_a) _dump_params(_a, __func__, __LINE__) +static void _dump_params(const struct os_area_params *p, const char *func, + int line) +{ + pr_debug("%s:%d: p.boot_flag: %u\n", func, line, p->boot_flag); + pr_debug("%s:%d: p.num_params: %u\n", func, line, p->num_params); + pr_debug("%s:%d: p.rtc_diff %lld\n", func, line, p->rtc_diff); + pr_debug("%s:%d: p.av_multi_out %u\n", func, line, p->av_multi_out); + pr_debug("%s:%d: p.ctrl_button: %u\n", func, line, p->ctrl_button); + pr_debug("%s:%d: p.static_ip_addr: %u.%u.%u.%u\n", func, line, + p->static_ip_addr[0], p->static_ip_addr[1], + p->static_ip_addr[2], p->static_ip_addr[3]); + pr_debug("%s:%d: p.network_mask: %u.%u.%u.%u\n", func, line, + p->network_mask[0], p->network_mask[1], + p->network_mask[2], p->network_mask[3]); + pr_debug("%s:%d: p.default_gateway: %u.%u.%u.%u\n", func, line, + p->default_gateway[0], p->default_gateway[1], + p->default_gateway[2], p->default_gateway[3]); + pr_debug("%s:%d: p.dns_primary: %u.%u.%u.%u\n", func, line, + p->dns_primary[0], p->dns_primary[1], + p->dns_primary[2], p->dns_primary[3]); + pr_debug("%s:%d: p.dns_secondary: %u.%u.%u.%u\n", func, line, + p->dns_secondary[0], p->dns_secondary[1], + p->dns_secondary[2], p->dns_secondary[3]); +} + +static int verify_header(const struct os_area_header *header) +{ + if (memcmp(header->magic_num, OS_AREA_HEADER_MAGIC_NUM, + sizeof(header->magic_num))) { + pr_debug("%s:%d magic_num failed\n", __func__, __LINE__); + return -1; + } + + if (header->hdr_version < 1) { + pr_debug("%s:%d hdr_version failed\n", __func__, __LINE__); + return -1; + } + + if (header->db_area_offset > header->ldr_area_offset) { + pr_debug("%s:%d offsets failed\n", __func__, __LINE__); + return -1; + } + + return 0; +} + +static int db_verify(const struct os_area_db *db) +{ + if (memcmp(db->magic_num, OS_AREA_DB_MAGIC_NUM, + sizeof(db->magic_num))) { + pr_debug("%s:%d magic_num failed\n", __func__, __LINE__); + return -EINVAL; + } + + if (db->version != 1) { + pr_debug("%s:%d version failed\n", __func__, __LINE__); + return -EINVAL; + } + + return 0; +} + +struct db_index { + uint8_t owner:5; + uint8_t key:3; +}; + +struct db_iterator { + const struct os_area_db *db; + struct os_area_db_id match_id; + struct db_index *idx; + struct db_index *last_idx; + union { + uint64_t *value_64; + uint32_t *value_32; + uint16_t *value_16; + }; +}; + +static unsigned int db_align_up(unsigned int val, unsigned int size) +{ + return (val + (size - 1)) & (~(size - 1)); +} + +/** + * db_for_each_64 - Iterator for 64 bit entries. + * + * A NULL value for id can be used to match all entries. + * OS_AREA_DB_OWNER_ANY and OS_AREA_DB_KEY_ANY can be used to match all. + */ + +static int db_for_each_64(const struct os_area_db *db, + const struct os_area_db_id *match_id, struct db_iterator *i) +{ +next: + if (!i->db) { + i->db = db; + i->match_id = match_id ? *match_id : os_area_db_id_any; + i->idx = (void *)db + db->index_64; + i->last_idx = i->idx + db->count_64; + i->value_64 = (void *)db + db->index_64 + + db_align_up(db->count_64, 8); + } else { + i->idx++; + i->value_64++; + } + + if (i->idx >= i->last_idx) { + pr_debug("%s:%d: reached end\n", __func__, __LINE__); + return 0; + } + + if (i->match_id.owner != OS_AREA_DB_OWNER_ANY + && i->match_id.owner != (int)i->idx->owner) + goto next; + if (i->match_id.key != OS_AREA_DB_KEY_ANY + && i->match_id.key != (int)i->idx->key) + goto next; + + return 1; +} + +static int db_delete_64(struct os_area_db *db, const struct os_area_db_id *id) +{ + struct db_iterator i; + + for (i.db = NULL; db_for_each_64(db, id, &i); ) { + + pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__, + i.idx->owner, i.idx->key, + (unsigned long long)*i.value_64); + + i.idx->owner = 0; + i.idx->key = 0; + *i.value_64 = 0; + } + return 0; +} + +static int db_set_64(struct os_area_db *db, const struct os_area_db_id *id, + uint64_t value) +{ + struct db_iterator i; + + pr_debug("%s:%d: (%d:%d) <= %llxh\n", __func__, __LINE__, + id->owner, id->key, (unsigned long long)value); + + if (!id->owner || id->owner == OS_AREA_DB_OWNER_ANY + || id->key == OS_AREA_DB_KEY_ANY) { + pr_debug("%s:%d: bad id: (%d:%d)\n", __func__, + __LINE__, id->owner, id->key); + return -1; + } + + db_delete_64(db, id); + + i.db = NULL; + if (db_for_each_64(db, &os_area_db_id_empty, &i)) { + + pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__, + i.idx->owner, i.idx->key, + (unsigned long long)*i.value_64); + + i.idx->owner = id->owner; + i.idx->key = id->key; + *i.value_64 = value; + + pr_debug("%s:%d: set (%d:%d) <= %llxh\n", __func__, __LINE__, + i.idx->owner, i.idx->key, + (unsigned long long)*i.value_64); + return 0; + } + pr_debug("%s:%d: database full.\n", + __func__, __LINE__); + return -1; +} + +static int __init db_get_64(const struct os_area_db *db, + const struct os_area_db_id *id, uint64_t *value) +{ + struct db_iterator i; + + i.db = NULL; + if (db_for_each_64(db, id, &i)) { + *value = *i.value_64; + pr_debug("%s:%d: found %lld\n", __func__, __LINE__, + (long long int)*i.value_64); + return 0; + } + pr_debug("%s:%d: not found\n", __func__, __LINE__); + return -1; +} + +static int __init db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff) +{ + return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff); +} + +#define dump_db(a) _dump_db(a, __func__, __LINE__) +static void _dump_db(const struct os_area_db *db, const char *func, + int line) +{ + char str[sizeof(db->magic_num) + 1]; + + dump_field(str, db->magic_num, sizeof(db->magic_num)); + pr_debug("%s:%d: db.magic_num: '%s'\n", func, line, + str); + pr_debug("%s:%d: db.version: %u\n", func, line, + db->version); + pr_debug("%s:%d: db.index_64: %u\n", func, line, + db->index_64); + pr_debug("%s:%d: db.count_64: %u\n", func, line, + db->count_64); + pr_debug("%s:%d: db.index_32: %u\n", func, line, + db->index_32); + pr_debug("%s:%d: db.count_32: %u\n", func, line, + db->count_32); + pr_debug("%s:%d: db.index_16: %u\n", func, line, + db->index_16); + pr_debug("%s:%d: db.count_16: %u\n", func, line, + db->count_16); +} + +static void os_area_db_init(struct os_area_db *db) +{ + enum { + HEADER_SIZE = offsetof(struct os_area_db, _db_data), + INDEX_64_COUNT = 64, + VALUES_64_COUNT = 57, + INDEX_32_COUNT = 64, + VALUES_32_COUNT = 57, + INDEX_16_COUNT = 64, + VALUES_16_COUNT = 57, + }; + + memset(db, 0, sizeof(struct os_area_db)); + + memcpy(db->magic_num, OS_AREA_DB_MAGIC_NUM, sizeof(db->magic_num)); + db->version = 1; + db->index_64 = HEADER_SIZE; + db->count_64 = VALUES_64_COUNT; + db->index_32 = HEADER_SIZE + + INDEX_64_COUNT * sizeof(struct db_index) + + VALUES_64_COUNT * sizeof(u64); + db->count_32 = VALUES_32_COUNT; + db->index_16 = HEADER_SIZE + + INDEX_64_COUNT * sizeof(struct db_index) + + VALUES_64_COUNT * sizeof(u64) + + INDEX_32_COUNT * sizeof(struct db_index) + + VALUES_32_COUNT * sizeof(u32); + db->count_16 = VALUES_16_COUNT; + + /* Rules to check db layout. */ + + BUILD_BUG_ON(sizeof(struct db_index) != 1); + BUILD_BUG_ON(sizeof(struct os_area_db) != 2 * OS_AREA_SEGMENT_SIZE); + BUILD_BUG_ON(INDEX_64_COUNT & 0x7); + BUILD_BUG_ON(VALUES_64_COUNT > INDEX_64_COUNT); + BUILD_BUG_ON(INDEX_32_COUNT & 0x7); + BUILD_BUG_ON(VALUES_32_COUNT > INDEX_32_COUNT); + BUILD_BUG_ON(INDEX_16_COUNT & 0x7); + BUILD_BUG_ON(VALUES_16_COUNT > INDEX_16_COUNT); + BUILD_BUG_ON(HEADER_SIZE + + INDEX_64_COUNT * sizeof(struct db_index) + + VALUES_64_COUNT * sizeof(u64) + + INDEX_32_COUNT * sizeof(struct db_index) + + VALUES_32_COUNT * sizeof(u32) + + INDEX_16_COUNT * sizeof(struct db_index) + + VALUES_16_COUNT * sizeof(u16) + > sizeof(struct os_area_db)); +} + +/** + * update_flash_db - Helper for os_area_queue_work_handler. + * + */ + +static int update_flash_db(void) +{ + const unsigned int buf_len = 8 * OS_AREA_SEGMENT_SIZE; + struct os_area_header *header; + ssize_t count; + int error; + loff_t pos; + struct os_area_db* db; + + /* Read in header and db from flash. */ + + header = kmalloc(buf_len, GFP_KERNEL); + if (!header) + return -ENOMEM; + + count = os_area_flash_read(header, buf_len, 0); + if (count < 0) { + pr_debug("%s: os_area_flash_read failed %zd\n", __func__, + count); + error = count; + goto fail; + } + + pos = header->db_area_offset * OS_AREA_SEGMENT_SIZE; + if (count < OS_AREA_SEGMENT_SIZE || verify_header(header) || + count < pos) { + pr_debug("%s: verify_header failed\n", __func__); + dump_header(header); + error = -EINVAL; + goto fail; + } + + /* Now got a good db offset and some maybe good db data. */ + + db = (void *)header + pos; + + error = db_verify(db); + if (error) { + pr_notice("%s: Verify of flash database failed, formatting.\n", + __func__); + dump_db(db); + os_area_db_init(db); + } + + /* Now got good db data. */ + + db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff); + + count = os_area_flash_write(db, sizeof(struct os_area_db), pos); + if (count < 0 || count < sizeof(struct os_area_db)) { + pr_debug("%s: os_area_flash_write failed %zd\n", __func__, + count); + error = count < 0 ? count : -EIO; + } + +fail: + kfree(header); + return error; +} + +/** + * os_area_queue_work_handler - Asynchronous write handler. + * + * An asynchronous write for flash memory and the device tree. Do not + * call directly, use os_area_queue_work(). + */ + +static void os_area_queue_work_handler(struct work_struct *work) +{ + struct device_node *node; + int error; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + node = of_find_node_by_path("/"); + if (node) { + os_area_set_property(node, &property_rtc_diff); + of_node_put(node); + } else + pr_debug("%s:%d of_find_node_by_path failed\n", + __func__, __LINE__); + + error = update_flash_db(); + if (error) + pr_warn("%s: Could not update FLASH ROM\n", __func__); + + pr_debug(" <- %s:%d\n", __func__, __LINE__); +} + +static void os_area_queue_work(void) +{ + static DECLARE_WORK(q, os_area_queue_work_handler); + + wmb(); + schedule_work(&q); +} + +/** + * ps3_os_area_save_params - Copy data from os area mirror to @saved_params. + * + * For the convenience of the guest the HV makes a copy of the os area in + * flash to a high address in the boot memory region and then puts that RAM + * address and the byte count into the repository for retrieval by the guest. + * We copy the data we want into a static variable and allow the memory setup + * by the HV to be claimed by the memblock manager. + * + * The os area mirror will not be available to a second stage kernel, and + * the header verify will fail. In this case, the saved_params values will + * be set from flash memory or the passed in device tree in ps3_os_area_init(). + */ + +void __init ps3_os_area_save_params(void) +{ + int result; + u64 lpar_addr; + unsigned int size; + struct os_area_header *header; + struct os_area_params *params; + struct os_area_db *db; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + result = ps3_repository_read_boot_dat_info(&lpar_addr, &size); + + if (result) { + pr_debug("%s:%d ps3_repository_read_boot_dat_info failed\n", + __func__, __LINE__); + return; + } + + header = (struct os_area_header *)__va(lpar_addr); + params = (struct os_area_params *)__va(lpar_addr + + OS_AREA_SEGMENT_SIZE); + + result = verify_header(header); + + if (result) { + /* Second stage kernels exit here. */ + pr_debug("%s:%d verify_header failed\n", __func__, __LINE__); + dump_header(header); + return; + } + + db = (struct os_area_db *)__va(lpar_addr + + header->db_area_offset * OS_AREA_SEGMENT_SIZE); + + dump_header(header); + dump_params(params); + dump_db(db); + + result = db_verify(db) || db_get_rtc_diff(db, &saved_params.rtc_diff); + if (result) + saved_params.rtc_diff = params->rtc_diff ? params->rtc_diff + : SECONDS_FROM_1970_TO_2000; + saved_params.av_multi_out = params->av_multi_out; + saved_params.valid = 1; + + memset(header, 0, sizeof(*header)); + + pr_debug(" <- %s:%d\n", __func__, __LINE__); +} + +/** + * ps3_os_area_init - Setup os area device tree properties as needed. + */ + +void __init ps3_os_area_init(void) +{ + struct device_node *node; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + node = of_find_node_by_path("/"); + + if (!saved_params.valid && node) { + /* Second stage kernels should have a dt entry. */ + os_area_get_property(node, &property_rtc_diff); + os_area_get_property(node, &property_av_multi_out); + } + + if(!saved_params.rtc_diff) + saved_params.rtc_diff = SECONDS_FROM_1970_TO_2000; + + if (node) { + os_area_set_property(node, &property_rtc_diff); + os_area_set_property(node, &property_av_multi_out); + of_node_put(node); + } else + pr_debug("%s:%d of_find_node_by_path failed\n", + __func__, __LINE__); + + pr_debug(" <- %s:%d\n", __func__, __LINE__); +} + +/** + * ps3_os_area_get_rtc_diff - Returns the rtc diff value. + */ + +u64 ps3_os_area_get_rtc_diff(void) +{ + return saved_params.rtc_diff; +} +EXPORT_SYMBOL_GPL(ps3_os_area_get_rtc_diff); + +/** + * ps3_os_area_set_rtc_diff - Set the rtc diff value. + * + * An asynchronous write is needed to support writing updates from + * the timer interrupt context. + */ + +void ps3_os_area_set_rtc_diff(u64 rtc_diff) +{ + if (saved_params.rtc_diff != rtc_diff) { + saved_params.rtc_diff = rtc_diff; + os_area_queue_work(); + } +} +EXPORT_SYMBOL_GPL(ps3_os_area_set_rtc_diff); + +/** + * ps3_os_area_get_av_multi_out - Returns the default video mode. + */ + +enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void) +{ + return saved_params.av_multi_out; +} +EXPORT_SYMBOL_GPL(ps3_os_area_get_av_multi_out); diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h new file mode 100644 index 0000000000..6beecdb0d5 --- /dev/null +++ b/arch/powerpc/platforms/ps3/platform.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PS3 platform declarations. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#if !defined(_PS3_PLATFORM_H) +#define _PS3_PLATFORM_H + +#include +#include + +#include + +/* htab */ + +void __init ps3_hpte_init(unsigned long htab_size); +void __init ps3_map_htab(void); + +/* mm */ + +void __init ps3_mm_init(void); +void __init ps3_mm_vas_create(unsigned long* htab_size); +void ps3_mm_vas_destroy(void); +void ps3_mm_shutdown(void); + +/* irq */ + +void ps3_init_IRQ(void); +void ps3_shutdown_IRQ(int cpu); +void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq); +void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq); + +/* smp */ + +void __init smp_init_ps3(void); +#ifdef CONFIG_SMP +void ps3_smp_cleanup_cpu(int cpu); +#else +static inline void ps3_smp_cleanup_cpu(int cpu) { } +#endif + +/* time */ + +void __init ps3_calibrate_decr(void); +time64_t __init ps3_get_boot_time(void); +void ps3_get_rtc_time(struct rtc_time *time); +int ps3_set_rtc_time(struct rtc_time *time); + +/* os area */ + +void __init ps3_os_area_save_params(void); +void __init ps3_os_area_init(void); + +/* spu */ + +#if defined(CONFIG_SPU_BASE) +void ps3_spu_set_platform (void); +#else +static inline void ps3_spu_set_platform (void) {} +#endif + +/* repository bus info */ + +enum ps3_bus_type { + PS3_BUS_TYPE_SB = 4, + PS3_BUS_TYPE_STORAGE = 5, +}; + +enum ps3_dev_type { + PS3_DEV_TYPE_STOR_DISK = TYPE_DISK, /* 0 */ + PS3_DEV_TYPE_SB_GELIC = 3, + PS3_DEV_TYPE_SB_USB = 4, + PS3_DEV_TYPE_STOR_ROM = TYPE_ROM, /* 5 */ + PS3_DEV_TYPE_SB_GPIO = 6, + PS3_DEV_TYPE_STOR_FLASH = TYPE_RBC, /* 14 */ +}; + +int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str, + u64 *value); +int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id); +int ps3_repository_read_bus_type(unsigned int bus_index, + enum ps3_bus_type *bus_type); +int ps3_repository_read_bus_num_dev(unsigned int bus_index, + unsigned int *num_dev); + +/* repository bus device info */ + +enum ps3_interrupt_type { + PS3_INTERRUPT_TYPE_EVENT_PORT = 2, + PS3_INTERRUPT_TYPE_SB_OHCI = 3, + PS3_INTERRUPT_TYPE_SB_EHCI = 4, + PS3_INTERRUPT_TYPE_OTHER = 5, +}; + +enum ps3_reg_type { + PS3_REG_TYPE_SB_OHCI = 3, + PS3_REG_TYPE_SB_EHCI = 4, + PS3_REG_TYPE_SB_GPIO = 5, +}; + +int ps3_repository_read_dev_str(unsigned int bus_index, + unsigned int dev_index, const char *dev_str, u64 *value); +int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index, + u64 *dev_id); +int ps3_repository_read_dev_type(unsigned int bus_index, + unsigned int dev_index, enum ps3_dev_type *dev_type); +int ps3_repository_read_dev_intr(unsigned int bus_index, + unsigned int dev_index, unsigned int intr_index, + enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id); +int ps3_repository_read_dev_reg_type(unsigned int bus_index, + unsigned int dev_index, unsigned int reg_index, + enum ps3_reg_type *reg_type); +int ps3_repository_read_dev_reg_addr(unsigned int bus_index, + unsigned int dev_index, unsigned int reg_index, u64 *bus_addr, + u64 *len); +int ps3_repository_read_dev_reg(unsigned int bus_index, + unsigned int dev_index, unsigned int reg_index, + enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len); + +/* repository bus enumerators */ + +struct ps3_repository_device { + unsigned int bus_index; + unsigned int dev_index; + enum ps3_bus_type bus_type; + enum ps3_dev_type dev_type; + u64 bus_id; + u64 dev_id; +}; + +int ps3_repository_find_device(struct ps3_repository_device *repo); +int ps3_repository_find_device_by_id(struct ps3_repository_device *repo, + u64 bus_id, u64 dev_id); +int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, + int (*callback)(const struct ps3_repository_device *repo)); +int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, + unsigned int *bus_index); +int ps3_repository_find_interrupt(const struct ps3_repository_device *repo, + enum ps3_interrupt_type intr_type, unsigned int *interrupt_id); +int ps3_repository_find_reg(const struct ps3_repository_device *repo, + enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len); + +/* repository block device info */ + +int ps3_repository_read_stor_dev_port(unsigned int bus_index, + unsigned int dev_index, u64 *port); +int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index, + unsigned int dev_index, u64 *blk_size); +int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index, + unsigned int dev_index, u64 *num_blocks); +int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index, + unsigned int dev_index, unsigned int *num_regions); +int ps3_repository_read_stor_dev_region_id(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, + unsigned int *region_id); +int ps3_repository_read_stor_dev_region_size(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, u64 *region_size); +int ps3_repository_read_stor_dev_region_start(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, u64 *region_start); +int ps3_repository_read_stor_dev_info(unsigned int bus_index, + unsigned int dev_index, u64 *port, u64 *blk_size, + u64 *num_blocks, unsigned int *num_regions); +int ps3_repository_read_stor_dev_region(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, + unsigned int *region_id, u64 *region_start, u64 *region_size); + +/* repository logical pu and memory info */ + +int ps3_repository_read_num_pu(u64 *num_pu); +int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id); +int ps3_repository_read_rm_base(unsigned int ppe_id, u64 *rm_base); +int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size); +int ps3_repository_read_region_total(u64 *region_total); +int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size, + u64 *region_total); +int ps3_repository_read_highmem_region_count(unsigned int *region_count); +int ps3_repository_read_highmem_base(unsigned int region_index, + u64 *highmem_base); +int ps3_repository_read_highmem_size(unsigned int region_index, + u64 *highmem_size); +int ps3_repository_read_highmem_info(unsigned int region_index, + u64 *highmem_base, u64 *highmem_size); + +#if defined (CONFIG_PS3_REPOSITORY_WRITE) +int ps3_repository_write_highmem_region_count(unsigned int region_count); +int ps3_repository_write_highmem_base(unsigned int region_index, + u64 highmem_base); +int ps3_repository_write_highmem_size(unsigned int region_index, + u64 highmem_size); +int ps3_repository_write_highmem_info(unsigned int region_index, + u64 highmem_base, u64 highmem_size); +int ps3_repository_delete_highmem_info(unsigned int region_index); +#else +static inline int ps3_repository_write_highmem_region_count( + unsigned int region_count) {return 0;} +static inline int ps3_repository_write_highmem_base(unsigned int region_index, + u64 highmem_base) {return 0;} +static inline int ps3_repository_write_highmem_size(unsigned int region_index, + u64 highmem_size) {return 0;} +static inline int ps3_repository_write_highmem_info(unsigned int region_index, + u64 highmem_base, u64 highmem_size) {return 0;} +static inline int ps3_repository_delete_highmem_info(unsigned int region_index) + {return 0;} +#endif + +/* repository pme info */ + +int ps3_repository_read_num_be(unsigned int *num_be); +int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id); +int ps3_repository_read_be_id(u64 node_id, u64 *be_id); +int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq); +int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq); + +/* repository performance monitor info */ + +int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar, + u64 *rights); + +/* repository 'Other OS' area */ + +int ps3_repository_read_boot_dat_addr(u64 *lpar_addr); +int ps3_repository_read_boot_dat_size(unsigned int *size); +int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size); + +/* repository spu info */ + +/** + * enum spu_resource_type - Type of spu resource. + * @spu_resource_type_shared: Logical spu is shared with other partions. + * @spu_resource_type_exclusive: Logical spu is not shared with other partions. + * + * Returned by ps3_repository_read_spu_resource_id(). + */ + +enum ps3_spu_resource_type { + PS3_SPU_RESOURCE_TYPE_SHARED = 0, + PS3_SPU_RESOURCE_TYPE_EXCLUSIVE = 0x8000000000000000UL, +}; + +int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved); +int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id); +int ps3_repository_read_spu_resource_id(unsigned int res_index, + enum ps3_spu_resource_type* resource_type, unsigned int *resource_id); + +/* repository vuart info */ + +int __init ps3_repository_read_vuart_av_port(unsigned int *port); +int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port); + +#endif diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c new file mode 100644 index 0000000000..1abe33fbe5 --- /dev/null +++ b/arch/powerpc/platforms/ps3/repository.c @@ -0,0 +1,1380 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 repository routines. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include + +#include "platform.h" + +enum ps3_vendor_id { + PS3_VENDOR_ID_NONE = 0, + PS3_VENDOR_ID_SONY = 0x8000000000000000UL, +}; + +enum ps3_lpar_id { + PS3_LPAR_ID_CURRENT = 0, + PS3_LPAR_ID_PME = 1, +}; + +#define dump_field(_a, _b) _dump_field(_a, _b, __func__, __LINE__) +static void _dump_field(const char *hdr, u64 n, const char *func, int line) +{ +#if defined(DEBUG) + char s[16]; + const char *const in = (const char *)&n; + unsigned int i; + + for (i = 0; i < 8; i++) + s[i] = (in[i] <= 126 && in[i] >= 32) ? in[i] : '.'; + s[i] = 0; + + pr_devel("%s:%d: %s%016llx : %s\n", func, line, hdr, n, s); +#endif +} + +#define dump_node_name(_a, _b, _c, _d, _e) \ + _dump_node_name(_a, _b, _c, _d, _e, __func__, __LINE__) +static void _dump_node_name(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, + u64 n4, const char *func, int line) +{ + pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id); + _dump_field("n1: ", n1, func, line); + _dump_field("n2: ", n2, func, line); + _dump_field("n3: ", n3, func, line); + _dump_field("n4: ", n4, func, line); +} + +#define dump_node(_a, _b, _c, _d, _e, _f, _g) \ + _dump_node(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__) +static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4, + u64 v1, u64 v2, const char *func, int line) +{ + pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id); + _dump_field("n1: ", n1, func, line); + _dump_field("n2: ", n2, func, line); + _dump_field("n3: ", n3, func, line); + _dump_field("n4: ", n4, func, line); + pr_devel("%s:%d: v1: %016llx\n", func, line, v1); + pr_devel("%s:%d: v2: %016llx\n", func, line, v2); +} + +/** + * make_first_field - Make the first field of a repository node name. + * @text: Text portion of the field. + * @index: Numeric index portion of the field. Use zero for 'don't care'. + * + * This routine sets the vendor id to zero (non-vendor specific). + * Returns field value. + */ + +static u64 make_first_field(const char *text, u64 index) +{ + u64 n = 0; + + memcpy((char *)&n, text, strnlen(text, sizeof(n))); + return PS3_VENDOR_ID_NONE + (n >> 32) + index; +} + +/** + * make_field - Make subsequent fields of a repository node name. + * @text: Text portion of the field. Use "" for 'don't care'. + * @index: Numeric index portion of the field. Use zero for 'don't care'. + * + * Returns field value. + */ + +static u64 make_field(const char *text, u64 index) +{ + u64 n = 0; + + memcpy((char *)&n, text, strnlen(text, sizeof(n))); + return n + index; +} + +/** + * read_node - Read a repository node from raw fields. + * @n1: First field of node name. + * @n2: Second field of node name. Use zero for 'don't care'. + * @n3: Third field of node name. Use zero for 'don't care'. + * @n4: Fourth field of node name. Use zero for 'don't care'. + * @v1: First repository value (high word). + * @v2: Second repository value (low word). Optional parameter, use zero + * for 'don't care'. + */ + +static int read_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4, + u64 *_v1, u64 *_v2) +{ + int result; + u64 v1; + u64 v2; + + if (lpar_id == PS3_LPAR_ID_CURRENT) { + u64 id; + lv1_get_logical_partition_id(&id); + lpar_id = id; + } + + result = lv1_read_repository_node(lpar_id, n1, n2, n3, n4, &v1, + &v2); + + if (result) { + pr_warn("%s:%d: lv1_read_repository_node failed: %s\n", + __func__, __LINE__, ps3_result(result)); + dump_node_name(lpar_id, n1, n2, n3, n4); + return -ENOENT; + } + + dump_node(lpar_id, n1, n2, n3, n4, v1, v2); + + if (_v1) + *_v1 = v1; + if (_v2) + *_v2 = v2; + + if (v1 && !_v1) + pr_devel("%s:%d: warning: discarding non-zero v1: %016llx\n", + __func__, __LINE__, v1); + if (v2 && !_v2) + pr_devel("%s:%d: warning: discarding non-zero v2: %016llx\n", + __func__, __LINE__, v2); + + return 0; +} + +int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str, + u64 *value) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field(bus_str, 0), + 0, 0, + value, NULL); +} + +int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id) +{ + return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index), + make_field("id", 0), 0, 0, bus_id, NULL); +} + +int ps3_repository_read_bus_type(unsigned int bus_index, + enum ps3_bus_type *bus_type) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("type", 0), + 0, 0, + &v1, NULL); + *bus_type = v1; + return result; +} + +int ps3_repository_read_bus_num_dev(unsigned int bus_index, + unsigned int *num_dev) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("num_dev", 0), + 0, 0, + &v1, NULL); + *num_dev = v1; + return result; +} + +int ps3_repository_read_dev_str(unsigned int bus_index, + unsigned int dev_index, const char *dev_str, u64 *value) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field(dev_str, 0), + 0, + value, NULL); +} + +int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index, + u64 *dev_id) +{ + return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index), + make_field("dev", dev_index), make_field("id", 0), 0, + dev_id, NULL); +} + +int ps3_repository_read_dev_type(unsigned int bus_index, + unsigned int dev_index, enum ps3_dev_type *dev_type) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("type", 0), + 0, + &v1, NULL); + *dev_type = v1; + return result; +} + +int ps3_repository_read_dev_intr(unsigned int bus_index, + unsigned int dev_index, unsigned int intr_index, + enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id) +{ + int result; + u64 v1 = 0; + u64 v2 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("intr", intr_index), + 0, + &v1, &v2); + *intr_type = v1; + *interrupt_id = v2; + return result; +} + +int ps3_repository_read_dev_reg_type(unsigned int bus_index, + unsigned int dev_index, unsigned int reg_index, + enum ps3_reg_type *reg_type) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("reg", reg_index), + make_field("type", 0), + &v1, NULL); + *reg_type = v1; + return result; +} + +int ps3_repository_read_dev_reg_addr(unsigned int bus_index, + unsigned int dev_index, unsigned int reg_index, u64 *bus_addr, u64 *len) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("reg", reg_index), + make_field("data", 0), + bus_addr, len); +} + +int ps3_repository_read_dev_reg(unsigned int bus_index, + unsigned int dev_index, unsigned int reg_index, + enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len) +{ + int result = ps3_repository_read_dev_reg_type(bus_index, dev_index, + reg_index, reg_type); + return result ? result + : ps3_repository_read_dev_reg_addr(bus_index, dev_index, + reg_index, bus_addr, len); +} + + + +int ps3_repository_find_device(struct ps3_repository_device *repo) +{ + int result; + struct ps3_repository_device tmp = *repo; + unsigned int num_dev; + + BUG_ON(repo->bus_index > 10); + BUG_ON(repo->dev_index > 10); + + result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev); + + if (result) { + pr_devel("%s:%d read_bus_num_dev failed\n", __func__, __LINE__); + return result; + } + + pr_devel("%s:%d: bus_type %u, bus_index %u, bus_id %llu, num_dev %u\n", + __func__, __LINE__, tmp.bus_type, tmp.bus_index, tmp.bus_id, + num_dev); + + if (tmp.dev_index >= num_dev) { + pr_devel("%s:%d: no device found\n", __func__, __LINE__); + return -ENODEV; + } + + result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index, + &tmp.dev_type); + + if (result) { + pr_devel("%s:%d read_dev_type failed\n", __func__, __LINE__); + return result; + } + + result = ps3_repository_read_dev_id(tmp.bus_index, tmp.dev_index, + &tmp.dev_id); + + if (result) { + pr_devel("%s:%d ps3_repository_read_dev_id failed\n", __func__, + __LINE__); + return result; + } + + pr_devel("%s:%d: found: dev_type %u, dev_index %u, dev_id %llu\n", + __func__, __LINE__, tmp.dev_type, tmp.dev_index, tmp.dev_id); + + *repo = tmp; + return 0; +} + +int ps3_repository_find_device_by_id(struct ps3_repository_device *repo, + u64 bus_id, u64 dev_id) +{ + int result = -ENODEV; + struct ps3_repository_device tmp; + unsigned int num_dev; + + pr_devel(" -> %s:%u: find device by id %llu:%llu\n", __func__, __LINE__, + bus_id, dev_id); + + for (tmp.bus_index = 0; tmp.bus_index < 10; tmp.bus_index++) { + result = ps3_repository_read_bus_id(tmp.bus_index, + &tmp.bus_id); + if (result) { + pr_devel("%s:%u read_bus_id(%u) failed\n", __func__, + __LINE__, tmp.bus_index); + return result; + } + + if (tmp.bus_id == bus_id) + goto found_bus; + + pr_devel("%s:%u: skip, bus_id %llu\n", __func__, __LINE__, + tmp.bus_id); + } + pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__); + return result; + +found_bus: + result = ps3_repository_read_bus_type(tmp.bus_index, &tmp.bus_type); + if (result) { + pr_devel("%s:%u read_bus_type(%u) failed\n", __func__, + __LINE__, tmp.bus_index); + return result; + } + + result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev); + if (result) { + pr_devel("%s:%u read_bus_num_dev failed\n", __func__, + __LINE__); + return result; + } + + for (tmp.dev_index = 0; tmp.dev_index < num_dev; tmp.dev_index++) { + result = ps3_repository_read_dev_id(tmp.bus_index, + tmp.dev_index, + &tmp.dev_id); + if (result) { + pr_devel("%s:%u read_dev_id(%u:%u) failed\n", __func__, + __LINE__, tmp.bus_index, tmp.dev_index); + return result; + } + + if (tmp.dev_id == dev_id) + goto found_dev; + + pr_devel("%s:%u: skip, dev_id %llu\n", __func__, __LINE__, + tmp.dev_id); + } + pr_devel(" <- %s:%u: dev not found\n", __func__, __LINE__); + return result; + +found_dev: + result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index, + &tmp.dev_type); + if (result) { + pr_devel("%s:%u read_dev_type failed\n", __func__, __LINE__); + return result; + } + + pr_devel(" <- %s:%u: found: type (%u:%u) index (%u:%u) id (%llu:%llu)\n", + __func__, __LINE__, tmp.bus_type, tmp.dev_type, tmp.bus_index, + tmp.dev_index, tmp.bus_id, tmp.dev_id); + *repo = tmp; + return 0; +} + +int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, + int (*callback)(const struct ps3_repository_device *repo)) +{ + int result = 0; + struct ps3_repository_device repo; + + pr_devel(" -> %s:%d: find bus_type %u\n", __func__, __LINE__, bus_type); + + repo.bus_type = bus_type; + result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index); + if (result) { + pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__); + return result; + } + + result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id); + if (result) { + pr_devel("%s:%d read_bus_id(%u) failed\n", __func__, __LINE__, + repo.bus_index); + return result; + } + + for (repo.dev_index = 0; ; repo.dev_index++) { + result = ps3_repository_find_device(&repo); + if (result == -ENODEV) { + result = 0; + break; + } else if (result) + break; + + result = callback(&repo); + if (result) { + pr_devel("%s:%d: abort at callback\n", __func__, + __LINE__); + break; + } + } + + pr_devel(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, + unsigned int *bus_index) +{ + unsigned int i; + enum ps3_bus_type type; + int error; + + for (i = from; i < 10; i++) { + error = ps3_repository_read_bus_type(i, &type); + if (error) { + pr_devel("%s:%d read_bus_type failed\n", + __func__, __LINE__); + *bus_index = UINT_MAX; + return error; + } + if (type == bus_type) { + *bus_index = i; + return 0; + } + } + *bus_index = UINT_MAX; + return -ENODEV; +} + +int ps3_repository_find_interrupt(const struct ps3_repository_device *repo, + enum ps3_interrupt_type intr_type, unsigned int *interrupt_id) +{ + int result = 0; + unsigned int res_index; + + pr_devel("%s:%d: find intr_type %u\n", __func__, __LINE__, intr_type); + + *interrupt_id = UINT_MAX; + + for (res_index = 0; res_index < 10; res_index++) { + enum ps3_interrupt_type t; + unsigned int id; + + result = ps3_repository_read_dev_intr(repo->bus_index, + repo->dev_index, res_index, &t, &id); + + if (result) { + pr_devel("%s:%d read_dev_intr failed\n", + __func__, __LINE__); + return result; + } + + if (t == intr_type) { + *interrupt_id = id; + break; + } + } + + if (res_index == 10) + return -ENODEV; + + pr_devel("%s:%d: found intr_type %u at res_index %u\n", + __func__, __LINE__, intr_type, res_index); + + return result; +} + +int ps3_repository_find_reg(const struct ps3_repository_device *repo, + enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len) +{ + int result = 0; + unsigned int res_index; + + pr_devel("%s:%d: find reg_type %u\n", __func__, __LINE__, reg_type); + + *bus_addr = *len = 0; + + for (res_index = 0; res_index < 10; res_index++) { + enum ps3_reg_type t; + u64 a; + u64 l; + + result = ps3_repository_read_dev_reg(repo->bus_index, + repo->dev_index, res_index, &t, &a, &l); + + if (result) { + pr_devel("%s:%d read_dev_reg failed\n", + __func__, __LINE__); + return result; + } + + if (t == reg_type) { + *bus_addr = a; + *len = l; + break; + } + } + + if (res_index == 10) + return -ENODEV; + + pr_devel("%s:%d: found reg_type %u at res_index %u\n", + __func__, __LINE__, reg_type, res_index); + + return result; +} + +int ps3_repository_read_stor_dev_port(unsigned int bus_index, + unsigned int dev_index, u64 *port) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("port", 0), + 0, port, NULL); +} + +int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index, + unsigned int dev_index, u64 *blk_size) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("blk_size", 0), + 0, blk_size, NULL); +} + +int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index, + unsigned int dev_index, u64 *num_blocks) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("n_blocks", 0), + 0, num_blocks, NULL); +} + +int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index, + unsigned int dev_index, unsigned int *num_regions) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("n_regs", 0), + 0, &v1, NULL); + *num_regions = v1; + return result; +} + +int ps3_repository_read_stor_dev_region_id(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, + unsigned int *region_id) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("region", region_index), + make_field("id", 0), + &v1, NULL); + *region_id = v1; + return result; +} + +int ps3_repository_read_stor_dev_region_size(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, u64 *region_size) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("region", region_index), + make_field("size", 0), + region_size, NULL); +} + +int ps3_repository_read_stor_dev_region_start(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, u64 *region_start) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("bus", bus_index), + make_field("dev", dev_index), + make_field("region", region_index), + make_field("start", 0), + region_start, NULL); +} + +int ps3_repository_read_stor_dev_info(unsigned int bus_index, + unsigned int dev_index, u64 *port, u64 *blk_size, + u64 *num_blocks, unsigned int *num_regions) +{ + int result; + + result = ps3_repository_read_stor_dev_port(bus_index, dev_index, port); + if (result) + return result; + + result = ps3_repository_read_stor_dev_blk_size(bus_index, dev_index, + blk_size); + if (result) + return result; + + result = ps3_repository_read_stor_dev_num_blocks(bus_index, dev_index, + num_blocks); + if (result) + return result; + + result = ps3_repository_read_stor_dev_num_regions(bus_index, dev_index, + num_regions); + return result; +} + +int ps3_repository_read_stor_dev_region(unsigned int bus_index, + unsigned int dev_index, unsigned int region_index, + unsigned int *region_id, u64 *region_start, u64 *region_size) +{ + int result; + + result = ps3_repository_read_stor_dev_region_id(bus_index, dev_index, + region_index, region_id); + if (result) + return result; + + result = ps3_repository_read_stor_dev_region_start(bus_index, dev_index, + region_index, region_start); + if (result) + return result; + + result = ps3_repository_read_stor_dev_region_size(bus_index, dev_index, + region_index, region_size); + return result; +} + +/** + * ps3_repository_read_num_pu - Number of logical PU processors for this lpar. + */ + +int ps3_repository_read_num_pu(u64 *num_pu) +{ + *num_pu = 0; + return read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("pun", 0), + 0, 0, + num_pu, NULL); +} + +/** + * ps3_repository_read_pu_id - Read the logical PU id. + * @pu_index: Zero based index. + * @pu_id: The logical PU id. + */ + +int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id) +{ + return read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("pu", pu_index), + 0, 0, + pu_id, NULL); +} + +int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size) +{ + return read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("pu", 0), + ppe_id, + make_field("rm_size", 0), + rm_size, NULL); +} + +int ps3_repository_read_region_total(u64 *region_total) +{ + return read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("rgntotal", 0), + 0, 0, + region_total, NULL); +} + +/** + * ps3_repository_read_mm_info - Read mm info for single pu system. + * @rm_base: Real mode memory base address. + * @rm_size: Real mode memory size. + * @region_total: Maximum memory region size. + */ + +int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size, u64 *region_total) +{ + int result; + u64 ppe_id; + + lv1_get_logical_ppe_id(&ppe_id); + *rm_base = 0; + result = ps3_repository_read_rm_size(ppe_id, rm_size); + return result ? result + : ps3_repository_read_region_total(region_total); +} + +/** + * ps3_repository_read_highmem_region_count - Read the number of highmem regions + * + * Bootloaders must arrange the repository nodes such that regions are indexed + * with a region_index from 0 to region_count-1. + */ + +int ps3_repository_read_highmem_region_count(unsigned int *region_count) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_CURRENT, + make_first_field("highmem", 0), + make_field("region", 0), + make_field("count", 0), + 0, + &v1, NULL); + *region_count = v1; + return result; +} + + +int ps3_repository_read_highmem_base(unsigned int region_index, + u64 *highmem_base) +{ + return read_node(PS3_LPAR_ID_CURRENT, + make_first_field("highmem", 0), + make_field("region", region_index), + make_field("base", 0), + 0, + highmem_base, NULL); +} + +int ps3_repository_read_highmem_size(unsigned int region_index, + u64 *highmem_size) +{ + return read_node(PS3_LPAR_ID_CURRENT, + make_first_field("highmem", 0), + make_field("region", region_index), + make_field("size", 0), + 0, + highmem_size, NULL); +} + +/** + * ps3_repository_read_highmem_info - Read high memory region info + * @region_index: Region index, {0,..,region_count-1}. + * @highmem_base: High memory base address. + * @highmem_size: High memory size. + * + * Bootloaders that preallocate highmem regions must place the + * region info into the repository at these well known nodes. + */ + +int ps3_repository_read_highmem_info(unsigned int region_index, + u64 *highmem_base, u64 *highmem_size) +{ + int result; + + *highmem_base = 0; + result = ps3_repository_read_highmem_base(region_index, highmem_base); + return result ? result + : ps3_repository_read_highmem_size(region_index, highmem_size); +} + +/** + * ps3_repository_read_num_spu_reserved - Number of physical spus reserved. + * @num_spu: Number of physical spus. + */ + +int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("spun", 0), + 0, 0, + &v1, NULL); + *num_spu_reserved = v1; + return result; +} + +/** + * ps3_repository_read_num_spu_resource_id - Number of spu resource reservations. + * @num_resource_id: Number of spu resource ids. + */ + +int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("spursvn", 0), + 0, 0, + &v1, NULL); + *num_resource_id = v1; + return result; +} + +/** + * ps3_repository_read_spu_resource_id - spu resource reservation id value. + * @res_index: Resource reservation index. + * @resource_type: Resource reservation type. + * @resource_id: Resource reservation id. + */ + +int ps3_repository_read_spu_resource_id(unsigned int res_index, + enum ps3_spu_resource_type *resource_type, unsigned int *resource_id) +{ + int result; + u64 v1 = 0; + u64 v2 = 0; + + result = read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("spursv", 0), + res_index, + 0, + &v1, &v2); + *resource_type = v1; + *resource_id = v2; + return result; +} + +static int ps3_repository_read_boot_dat_address(u64 *address) +{ + return read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("boot_dat", 0), + make_field("address", 0), + 0, + address, NULL); +} + +int ps3_repository_read_boot_dat_size(unsigned int *size) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("boot_dat", 0), + make_field("size", 0), + 0, + &v1, NULL); + *size = v1; + return result; +} + +int __init ps3_repository_read_vuart_av_port(unsigned int *port) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("vir_uart", 0), + make_field("port", 0), + make_field("avset", 0), + &v1, NULL); + *port = v1; + return result; +} + +int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_CURRENT, + make_first_field("bi", 0), + make_field("vir_uart", 0), + make_field("port", 0), + make_field("sysmgr", 0), + &v1, NULL); + *port = v1; + return result; +} + +/** + * ps3_repository_read_boot_dat_info - Get address and size of cell_ext_os_area. + * address: lpar address of cell_ext_os_area + * @size: size of cell_ext_os_area + */ + +int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size) +{ + int result; + + *size = 0; + result = ps3_repository_read_boot_dat_address(lpar_addr); + return result ? result + : ps3_repository_read_boot_dat_size(size); +} + +/** + * ps3_repository_read_num_be - Number of physical BE processors in the system. + */ + +int ps3_repository_read_num_be(unsigned int *num_be) +{ + int result; + u64 v1 = 0; + + result = read_node(PS3_LPAR_ID_PME, + make_first_field("ben", 0), + 0, + 0, + 0, + &v1, NULL); + *num_be = v1; + return result; +} + +/** + * ps3_repository_read_be_node_id - Read the physical BE processor node id. + * @be_index: Zero based index. + * @node_id: The BE processor node id. + */ + +int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("be", be_index), + 0, + 0, + 0, + node_id, NULL); +} + +/** + * ps3_repository_read_be_id - Read the physical BE processor id. + * @node_id: The BE processor node id. + * @be_id: The BE processor id. + */ + +int ps3_repository_read_be_id(u64 node_id, u64 *be_id) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("be", 0), + node_id, + 0, + 0, + be_id, NULL); +} + +int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) +{ + return read_node(PS3_LPAR_ID_PME, + make_first_field("be", 0), + node_id, + make_field("clock", 0), + 0, + tb_freq, NULL); +} + +int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq) +{ + int result; + u64 node_id; + + *tb_freq = 0; + result = ps3_repository_read_be_node_id(be_index, &node_id); + return result ? result + : ps3_repository_read_tb_freq(node_id, tb_freq); +} + +int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar, + u64 *rights) +{ + int result; + u64 node_id; + + *lpar = 0; + *rights = 0; + result = ps3_repository_read_be_node_id(be_index, &node_id); + return result ? result + : read_node(PS3_LPAR_ID_PME, + make_first_field("be", 0), + node_id, + make_field("lpm", 0), + make_field("priv", 0), + lpar, rights); +} + +#if defined(CONFIG_PS3_REPOSITORY_WRITE) + +static int create_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2) +{ + int result; + + dump_node(0, n1, n2, n3, n4, v1, v2); + + result = lv1_create_repository_node(n1, n2, n3, n4, v1, v2); + + if (result) { + pr_devel("%s:%d: lv1_create_repository_node failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return -ENOENT; + } + + return 0; +} + +static int delete_node(u64 n1, u64 n2, u64 n3, u64 n4) +{ + int result; + + dump_node(0, n1, n2, n3, n4, 0, 0); + + result = lv1_delete_repository_node(n1, n2, n3, n4); + + if (result) { + pr_devel("%s:%d: lv1_delete_repository_node failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return -ENOENT; + } + + return 0; +} + +static int write_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2) +{ + int result; + + result = create_node(n1, n2, n3, n4, v1, v2); + + if (!result) + return 0; + + result = lv1_write_repository_node(n1, n2, n3, n4, v1, v2); + + if (result) { + pr_devel("%s:%d: lv1_write_repository_node failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return -ENOENT; + } + + return 0; +} + +int ps3_repository_write_highmem_region_count(unsigned int region_count) +{ + int result; + u64 v1 = (u64)region_count; + + result = write_node( + make_first_field("highmem", 0), + make_field("region", 0), + make_field("count", 0), + 0, + v1, 0); + return result; +} + +int ps3_repository_write_highmem_base(unsigned int region_index, + u64 highmem_base) +{ + return write_node( + make_first_field("highmem", 0), + make_field("region", region_index), + make_field("base", 0), + 0, + highmem_base, 0); +} + +int ps3_repository_write_highmem_size(unsigned int region_index, + u64 highmem_size) +{ + return write_node( + make_first_field("highmem", 0), + make_field("region", region_index), + make_field("size", 0), + 0, + highmem_size, 0); +} + +int ps3_repository_write_highmem_info(unsigned int region_index, + u64 highmem_base, u64 highmem_size) +{ + int result; + + result = ps3_repository_write_highmem_base(region_index, highmem_base); + return result ? result + : ps3_repository_write_highmem_size(region_index, highmem_size); +} + +static int ps3_repository_delete_highmem_base(unsigned int region_index) +{ + return delete_node( + make_first_field("highmem", 0), + make_field("region", region_index), + make_field("base", 0), + 0); +} + +static int ps3_repository_delete_highmem_size(unsigned int region_index) +{ + return delete_node( + make_first_field("highmem", 0), + make_field("region", region_index), + make_field("size", 0), + 0); +} + +int ps3_repository_delete_highmem_info(unsigned int region_index) +{ + int result; + + result = ps3_repository_delete_highmem_base(region_index); + result += ps3_repository_delete_highmem_size(region_index); + + return result ? -1 : 0; +} + +#endif /* defined(CONFIG_PS3_REPOSITORY_WRITE) */ + +#if defined(DEBUG) + +int __init ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) +{ + int result = 0; + unsigned int res_index; + + pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + + for (res_index = 0; res_index < 10; res_index++) { + enum ps3_interrupt_type intr_type; + unsigned int interrupt_id; + + result = ps3_repository_read_dev_intr(repo->bus_index, + repo->dev_index, res_index, &intr_type, &interrupt_id); + + if (result) { + if (result != LV1_NO_ENTRY) + pr_devel("%s:%d ps3_repository_read_dev_intr" + " (%u:%u) failed\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + break; + } + + pr_devel("%s:%d (%u:%u) intr_type %u, interrupt_id %u\n", + __func__, __LINE__, repo->bus_index, repo->dev_index, + intr_type, interrupt_id); + } + + for (res_index = 0; res_index < 10; res_index++) { + enum ps3_reg_type reg_type; + u64 bus_addr; + u64 len; + + result = ps3_repository_read_dev_reg(repo->bus_index, + repo->dev_index, res_index, ®_type, &bus_addr, &len); + + if (result) { + if (result != LV1_NO_ENTRY) + pr_devel("%s:%d ps3_repository_read_dev_reg" + " (%u:%u) failed\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + break; + } + + pr_devel("%s:%d (%u:%u) reg_type %u, bus_addr %llxh, len %llxh\n", + __func__, __LINE__, repo->bus_index, repo->dev_index, + reg_type, bus_addr, len); + } + + pr_devel(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +static int __init dump_stor_dev_info(struct ps3_repository_device *repo) +{ + int result = 0; + unsigned int num_regions, region_index; + u64 port, blk_size, num_blocks; + + pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + + result = ps3_repository_read_stor_dev_info(repo->bus_index, + repo->dev_index, &port, &blk_size, &num_blocks, &num_regions); + if (result) { + pr_devel("%s:%d ps3_repository_read_stor_dev_info" + " (%u:%u) failed\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + goto out; + } + + pr_devel("%s:%d (%u:%u): port %llu, blk_size %llu, num_blocks " + "%llu, num_regions %u\n", + __func__, __LINE__, repo->bus_index, repo->dev_index, + port, blk_size, num_blocks, num_regions); + + for (region_index = 0; region_index < num_regions; region_index++) { + unsigned int region_id; + u64 region_start, region_size; + + result = ps3_repository_read_stor_dev_region(repo->bus_index, + repo->dev_index, region_index, ®ion_id, + ®ion_start, ®ion_size); + if (result) { + pr_devel("%s:%d ps3_repository_read_stor_dev_region" + " (%u:%u) failed\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + break; + } + + pr_devel("%s:%d (%u:%u) region_id %u, start %lxh, size %lxh\n", + __func__, __LINE__, repo->bus_index, repo->dev_index, + region_id, (unsigned long)region_start, + (unsigned long)region_size); + } + +out: + pr_devel(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +static int __init dump_device_info(struct ps3_repository_device *repo, + unsigned int num_dev) +{ + int result = 0; + + pr_devel(" -> %s:%d: bus_%u\n", __func__, __LINE__, repo->bus_index); + + for (repo->dev_index = 0; repo->dev_index < num_dev; + repo->dev_index++) { + + result = ps3_repository_read_dev_type(repo->bus_index, + repo->dev_index, &repo->dev_type); + + if (result) { + pr_devel("%s:%d ps3_repository_read_dev_type" + " (%u:%u) failed\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + break; + } + + result = ps3_repository_read_dev_id(repo->bus_index, + repo->dev_index, &repo->dev_id); + + if (result) { + pr_devel("%s:%d ps3_repository_read_dev_id" + " (%u:%u) failed\n", __func__, __LINE__, + repo->bus_index, repo->dev_index); + continue; + } + + pr_devel("%s:%d (%u:%u): dev_type %u, dev_id %lu\n", __func__, + __LINE__, repo->bus_index, repo->dev_index, + repo->dev_type, (unsigned long)repo->dev_id); + + ps3_repository_dump_resource_info(repo); + + if (repo->bus_type == PS3_BUS_TYPE_STORAGE) + dump_stor_dev_info(repo); + } + + pr_devel(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +int __init ps3_repository_dump_bus_info(void) +{ + int result = 0; + struct ps3_repository_device repo; + + pr_devel(" -> %s:%d\n", __func__, __LINE__); + + memset(&repo, 0, sizeof(repo)); + + for (repo.bus_index = 0; repo.bus_index < 10; repo.bus_index++) { + unsigned int num_dev; + + result = ps3_repository_read_bus_type(repo.bus_index, + &repo.bus_type); + + if (result) { + pr_devel("%s:%d read_bus_type(%u) failed\n", + __func__, __LINE__, repo.bus_index); + break; + } + + result = ps3_repository_read_bus_id(repo.bus_index, + &repo.bus_id); + + if (result) { + pr_devel("%s:%d read_bus_id(%u) failed\n", + __func__, __LINE__, repo.bus_index); + continue; + } + + if (repo.bus_index != repo.bus_id) + pr_devel("%s:%d bus_index != bus_id\n", + __func__, __LINE__); + + result = ps3_repository_read_bus_num_dev(repo.bus_index, + &num_dev); + + if (result) { + pr_devel("%s:%d read_bus_num_dev(%u) failed\n", + __func__, __LINE__, repo.bus_index); + continue; + } + + pr_devel("%s:%d bus_%u: bus_type %u, bus_id %lu, num_dev %u\n", + __func__, __LINE__, repo.bus_index, repo.bus_type, + (unsigned long)repo.bus_id, num_dev); + + dump_device_info(&repo, num_dev); + } + + pr_devel(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +#endif /* defined(DEBUG) */ diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c new file mode 100644 index 0000000000..5144f11359 --- /dev/null +++ b/arch/powerpc/platforms/ps3/setup.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 platform setup routines. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "platform.h" + +#if defined(DEBUG) +#define DBG udbg_printf +#else +#define DBG pr_debug +#endif + +/* mutex synchronizing GPU accesses and video mode changes */ +DEFINE_MUTEX(ps3_gpu_mutex); +EXPORT_SYMBOL_GPL(ps3_gpu_mutex); + +static union ps3_firmware_version ps3_firmware_version; +static char ps3_firmware_version_str[16]; + +void ps3_get_firmware_version(union ps3_firmware_version *v) +{ + *v = ps3_firmware_version; +} +EXPORT_SYMBOL_GPL(ps3_get_firmware_version); + +int ps3_compare_firmware_version(u16 major, u16 minor, u16 rev) +{ + union ps3_firmware_version x; + + x.pad = 0; + x.major = major; + x.minor = minor; + x.rev = rev; + + return (ps3_firmware_version.raw > x.raw) - + (ps3_firmware_version.raw < x.raw); +} +EXPORT_SYMBOL_GPL(ps3_compare_firmware_version); + +static void ps3_power_save(void) +{ + /* + * lv1_pause() puts the PPE thread into inactive state until an + * irq on an unmasked plug exists. MSR[EE] has no effect. + * flags: 0 = wake on DEC interrupt, 1 = ignore DEC interrupt. + */ + + lv1_pause(0); +} + +static void __noreturn ps3_restart(char *cmd) +{ + DBG("%s:%d cmd '%s'\n", __func__, __LINE__, cmd); + + smp_send_stop(); + ps3_sys_manager_restart(); /* never returns */ +} + +static void ps3_power_off(void) +{ + DBG("%s:%d\n", __func__, __LINE__); + + smp_send_stop(); + ps3_sys_manager_power_off(); /* never returns */ +} + +static void __noreturn ps3_halt(void) +{ + DBG("%s:%d\n", __func__, __LINE__); + + smp_send_stop(); + ps3_sys_manager_halt(); /* never returns */ +} + +static void ps3_panic(char *str) +{ + DBG("%s:%d %s\n", __func__, __LINE__, str); + + smp_send_stop(); + printk("\n"); + printk(" System does not reboot automatically.\n"); + printk(" Please press POWER button.\n"); + printk("\n"); + panic_flush_kmsg_end(); + + while(1) + lv1_pause(1); +} + +#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \ + defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE) +static void __init prealloc(struct ps3_prealloc *p) +{ + if (!p->size) + return; + + p->address = memblock_alloc(p->size, p->align); + if (!p->address) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, p->size, p->align); + + printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size, + p->address); +} +#endif + +#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) +struct ps3_prealloc ps3fb_videomemory = { + .name = "ps3fb videomemory", + .size = CONFIG_FB_PS3_DEFAULT_SIZE_M*1024*1024, + .align = 1024*1024 /* the GPU requires 1 MiB alignment */ +}; +EXPORT_SYMBOL_GPL(ps3fb_videomemory); +#define prealloc_ps3fb_videomemory() prealloc(&ps3fb_videomemory) + +static int __init early_parse_ps3fb(char *p) +{ + if (!p) + return 1; + + ps3fb_videomemory.size = ALIGN(memparse(p, &p), + ps3fb_videomemory.align); + return 0; +} +early_param("ps3fb", early_parse_ps3fb); +#else +#define prealloc_ps3fb_videomemory() do { } while (0) +#endif + +#if defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE) +struct ps3_prealloc ps3flash_bounce_buffer = { + .name = "ps3flash bounce buffer", + .size = 256*1024, + .align = 256*1024 +}; +EXPORT_SYMBOL_GPL(ps3flash_bounce_buffer); +#define prealloc_ps3flash_bounce_buffer() prealloc(&ps3flash_bounce_buffer) + +static int __init early_parse_ps3flash(char *p) +{ + if (!p) + return 1; + + if (!strcmp(p, "off")) + ps3flash_bounce_buffer.size = 0; + + return 0; +} +early_param("ps3flash", early_parse_ps3flash); +#else +#define prealloc_ps3flash_bounce_buffer() do { } while (0) +#endif + +static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx) +{ + /* Have to set at least one bit in the DABRX */ + if (dabrx == 0 && dabr == 0) + dabrx = DABRX_USER; + /* hypervisor only allows us to set BTI, Kernel and user */ + dabrx &= DABRX_BTI | DABRX_KERNEL | DABRX_USER; + + return lv1_set_dabr(dabr, dabrx) ? -1 : 0; +} + +static ssize_t ps3_fw_version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s", ps3_firmware_version_str); +} + +static int __init ps3_setup_sysfs(void) +{ + static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO, + ps3_fw_version_show, NULL); + static struct kobject *kobj; + int result; + + kobj = kobject_create_and_add("ps3", firmware_kobj); + + if (!kobj) { + pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__, + __LINE__); + return -ENOMEM; + } + + result = sysfs_create_file(kobj, &attr.attr); + + if (result) { + pr_warn("%s:%d: sysfs_create_file failed.\n", __func__, + __LINE__); + kobject_put(kobj); + return -ENOMEM; + } + + return 0; +} +core_initcall(ps3_setup_sysfs); + +static void __init ps3_setup_arch(void) +{ + u64 tmp; + + DBG(" -> %s:%d\n", __func__, __LINE__); + + lv1_get_version_info(&ps3_firmware_version.raw, &tmp); + + snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str), + "%u.%u.%u", ps3_firmware_version.major, + ps3_firmware_version.minor, ps3_firmware_version.rev); + + printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str); + + ps3_spu_set_platform(); + +#ifdef CONFIG_SMP + smp_init_ps3(); +#endif + + prealloc_ps3fb_videomemory(); + prealloc_ps3flash_bounce_buffer(); + + ppc_md.power_save = ps3_power_save; + ps3_os_area_init(); + + DBG(" <- %s:%d\n", __func__, __LINE__); +} + +static void __init ps3_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + +void __init ps3_early_mm_init(void) +{ + unsigned long htab_size; + + ps3_mm_init(); + ps3_mm_vas_create(&htab_size); + ps3_hpte_init(htab_size); +} + +static int __init ps3_probe(void) +{ + DBG(" -> %s:%d\n", __func__, __LINE__); + + ps3_os_area_save_params(); + + pm_power_off = ps3_power_off; + + DBG(" <- %s:%d\n", __func__, __LINE__); + return 1; +} + +#if defined(CONFIG_KEXEC_CORE) +static void ps3_kexec_cpu_down(int crash_shutdown, int secondary) +{ + int cpu = smp_processor_id(); + + DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu); + + ps3_smp_cleanup_cpu(cpu); + ps3_shutdown_IRQ(cpu); + + DBG(" <- %s:%d\n", __func__, __LINE__); +} +#endif + +define_machine(ps3) { + .name = "PS3", + .compatible = "sony,ps3", + .probe = ps3_probe, + .setup_arch = ps3_setup_arch, + .init_IRQ = ps3_init_IRQ, + .panic = ps3_panic, + .get_boot_time = ps3_get_boot_time, + .set_dabr = ps3_set_dabr, + .calibrate_decr = ps3_calibrate_decr, + .progress = ps3_progress, + .restart = ps3_restart, + .halt = ps3_halt, +#if defined(CONFIG_KEXEC_CORE) + .kexec_cpu_down = ps3_kexec_cpu_down, +#endif +}; diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c new file mode 100644 index 0000000000..8529575600 --- /dev/null +++ b/arch/powerpc/platforms/ps3/smp.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 SMP routines. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include + +#include +#include + +#include "platform.h" + +#if defined(DEBUG) +#define DBG udbg_printf +#else +#define DBG pr_debug +#endif + +/** + * ps3_ipi_virqs - a per cpu array of virqs for ipi use + */ + +#define MSG_COUNT 4 +static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs); + +static void ps3_smp_message_pass(int cpu, int msg) +{ + int result; + unsigned int virq; + + if (msg >= MSG_COUNT) { + DBG("%s:%d: bad msg: %d\n", __func__, __LINE__, msg); + return; + } + + virq = per_cpu(ps3_ipi_virqs, cpu)[msg]; + result = ps3_send_event_locally(virq); + + if (result) + DBG("%s:%d: ps3_send_event_locally(%d, %d) failed" + " (%d)\n", __func__, __LINE__, cpu, msg, result); +} + +static void __init ps3_smp_probe(void) +{ + int cpu; + + for (cpu = 0; cpu < 2; cpu++) { + int result; + unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu); + int i; + + DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu); + + /* + * Check assumptions on ps3_ipi_virqs[] indexing. If this + * check fails, then a different mapping of PPC_MSG_ + * to index needs to be setup. + */ + + BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION != 0); + BUILD_BUG_ON(PPC_MSG_RESCHEDULE != 1); + BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST != 2); + BUILD_BUG_ON(PPC_MSG_NMI_IPI != 3); + + for (i = 0; i < MSG_COUNT; i++) { + result = ps3_event_receive_port_setup(cpu, &virqs[i]); + + if (result) + continue; + + DBG("%s:%d: (%d, %d) => virq %u\n", + __func__, __LINE__, cpu, i, virqs[i]); + + result = smp_request_message_ipi(virqs[i], i); + + if (result) + virqs[i] = 0; + else + ps3_register_ipi_irq(cpu, virqs[i]); + } + + ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_NMI_IPI]); + + DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu); + } +} + +void ps3_smp_cleanup_cpu(int cpu) +{ + unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu); + int i; + + DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu); + + for (i = 0; i < MSG_COUNT; i++) { + /* Can't call free_irq from interrupt context. */ + ps3_event_receive_port_destroy(virqs[i]); + virqs[i] = 0; + } + + DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu); +} + +static struct smp_ops_t ps3_smp_ops = { + .probe = ps3_smp_probe, + .message_pass = ps3_smp_message_pass, + .kick_cpu = smp_generic_kick_cpu, +}; + +void __init smp_init_ps3(void) +{ + DBG(" -> %s\n", __func__); + smp_ops = &ps3_smp_ops; + DBG(" <- %s\n", __func__); +} diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c new file mode 100644 index 0000000000..4a2520ec6d --- /dev/null +++ b/arch/powerpc/platforms/ps3/spu.c @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 Platform spu routines. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../cell/spufs/spufs.h" +#include "platform.h" + +/* spu_management_ops */ + +/** + * enum spe_type - Type of spe to create. + * @spe_type_logical: Standard logical spe. + * + * For use with lv1_construct_logical_spe(). The current HV does not support + * any types other than those listed. + */ + +enum spe_type { + SPE_TYPE_LOGICAL = 0, +}; + +/** + * struct spe_shadow - logical spe shadow register area. + * + * Read-only shadow of spe registers. + */ + +struct spe_shadow { + u8 padding_0140[0x0140]; + u64 int_status_class0_RW; /* 0x0140 */ + u64 int_status_class1_RW; /* 0x0148 */ + u64 int_status_class2_RW; /* 0x0150 */ + u8 padding_0158[0x0610-0x0158]; + u64 mfc_dsisr_RW; /* 0x0610 */ + u8 padding_0618[0x0620-0x0618]; + u64 mfc_dar_RW; /* 0x0620 */ + u8 padding_0628[0x0800-0x0628]; + u64 mfc_dsipr_R; /* 0x0800 */ + u8 padding_0808[0x0810-0x0808]; + u64 mfc_lscrr_R; /* 0x0810 */ + u8 padding_0818[0x0c00-0x0818]; + u64 mfc_cer_R; /* 0x0c00 */ + u8 padding_0c08[0x0f00-0x0c08]; + u64 spe_execution_status; /* 0x0f00 */ + u8 padding_0f08[0x1000-0x0f08]; +}; + +/** + * enum spe_ex_state - Logical spe execution state. + * @spe_ex_state_unexecutable: Uninitialized. + * @spe_ex_state_executable: Enabled, not ready. + * @spe_ex_state_executed: Ready for use. + * + * The execution state (status) of the logical spe as reported in + * struct spe_shadow:spe_execution_status. + */ + +enum spe_ex_state { + SPE_EX_STATE_UNEXECUTABLE = 0, + SPE_EX_STATE_EXECUTABLE = 2, + SPE_EX_STATE_EXECUTED = 3, +}; + +/** + * struct priv1_cache - Cached values of priv1 registers. + * @masks[]: Array of cached spe interrupt masks, indexed by class. + * @sr1: Cached mfc_sr1 register. + * @tclass_id: Cached mfc_tclass_id register. + */ + +struct priv1_cache { + u64 masks[3]; + u64 sr1; + u64 tclass_id; +}; + +/** + * struct spu_pdata - Platform state variables. + * @spe_id: HV spe id returned by lv1_construct_logical_spe(). + * @resource_id: HV spe resource id returned by + * ps3_repository_read_spe_resource_id(). + * @priv2_addr: lpar address of spe priv2 area returned by + * lv1_construct_logical_spe(). + * @shadow_addr: lpar address of spe register shadow area returned by + * lv1_construct_logical_spe(). + * @shadow: Virtual (ioremap) address of spe register shadow area. + * @cache: Cached values of priv1 registers. + */ + +struct spu_pdata { + u64 spe_id; + u64 resource_id; + u64 priv2_addr; + u64 shadow_addr; + struct spe_shadow __iomem *shadow; + struct priv1_cache cache; +}; + +static struct spu_pdata *spu_pdata(struct spu *spu) +{ + return spu->pdata; +} + +#define dump_areas(_a, _b, _c, _d, _e) \ + _dump_areas(_a, _b, _c, _d, _e, __func__, __LINE__) +static void _dump_areas(unsigned int spe_id, unsigned long priv2, + unsigned long problem, unsigned long ls, unsigned long shadow, + const char* func, int line) +{ + pr_debug("%s:%d: spe_id: %xh (%u)\n", func, line, spe_id, spe_id); + pr_debug("%s:%d: priv2: %lxh\n", func, line, priv2); + pr_debug("%s:%d: problem: %lxh\n", func, line, problem); + pr_debug("%s:%d: ls: %lxh\n", func, line, ls); + pr_debug("%s:%d: shadow: %lxh\n", func, line, shadow); +} + +u64 ps3_get_spe_id(void *arg) +{ + return spu_pdata(arg)->spe_id; +} +EXPORT_SYMBOL_GPL(ps3_get_spe_id); + +static unsigned long __init get_vas_id(void) +{ + u64 id; + + lv1_get_logical_ppe_id(&id); + lv1_get_virtual_address_space_id_of_ppe(&id); + + return id; +} + +static int __init construct_spu(struct spu *spu) +{ + int result; + u64 unused; + u64 problem_phys; + u64 local_store_phys; + + result = lv1_construct_logical_spe(PAGE_SHIFT, PAGE_SHIFT, PAGE_SHIFT, + PAGE_SHIFT, PAGE_SHIFT, get_vas_id(), SPE_TYPE_LOGICAL, + &spu_pdata(spu)->priv2_addr, &problem_phys, + &local_store_phys, &unused, + &spu_pdata(spu)->shadow_addr, + &spu_pdata(spu)->spe_id); + spu->problem_phys = problem_phys; + spu->local_store_phys = local_store_phys; + + if (result) { + pr_debug("%s:%d: lv1_construct_logical_spe failed: %s\n", + __func__, __LINE__, ps3_result(result)); + return result; + } + + return result; +} + +static void spu_unmap(struct spu *spu) +{ + iounmap(spu->priv2); + iounmap(spu->problem); + iounmap((__force u8 __iomem *)spu->local_store); + iounmap(spu_pdata(spu)->shadow); +} + +/** + * setup_areas - Map the spu regions into the address space. + * + * The current HV requires the spu shadow regs to be mapped with the + * PTE page protection bits set as read-only. + */ + +static int __init setup_areas(struct spu *spu) +{ + struct table {char* name; unsigned long addr; unsigned long size;}; + unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO)); + + spu_pdata(spu)->shadow = ioremap_prot(spu_pdata(spu)->shadow_addr, + sizeof(struct spe_shadow), shadow_flags); + if (!spu_pdata(spu)->shadow) { + pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__); + goto fail_ioremap; + } + + spu->local_store = (__force void *)ioremap_wc(spu->local_store_phys, LS_SIZE); + + if (!spu->local_store) { + pr_debug("%s:%d: ioremap local_store failed\n", + __func__, __LINE__); + goto fail_ioremap; + } + + spu->problem = ioremap(spu->problem_phys, + sizeof(struct spu_problem)); + + if (!spu->problem) { + pr_debug("%s:%d: ioremap problem failed\n", __func__, __LINE__); + goto fail_ioremap; + } + + spu->priv2 = ioremap(spu_pdata(spu)->priv2_addr, + sizeof(struct spu_priv2)); + + if (!spu->priv2) { + pr_debug("%s:%d: ioremap priv2 failed\n", __func__, __LINE__); + goto fail_ioremap; + } + + dump_areas(spu_pdata(spu)->spe_id, spu_pdata(spu)->priv2_addr, + spu->problem_phys, spu->local_store_phys, + spu_pdata(spu)->shadow_addr); + dump_areas(spu_pdata(spu)->spe_id, (unsigned long)spu->priv2, + (unsigned long)spu->problem, (unsigned long)spu->local_store, + (unsigned long)spu_pdata(spu)->shadow); + + return 0; + +fail_ioremap: + spu_unmap(spu); + + return -ENOMEM; +} + +static int __init setup_interrupts(struct spu *spu) +{ + int result; + + result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id, + 0, &spu->irqs[0]); + + if (result) + goto fail_alloc_0; + + result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id, + 1, &spu->irqs[1]); + + if (result) + goto fail_alloc_1; + + result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id, + 2, &spu->irqs[2]); + + if (result) + goto fail_alloc_2; + + return result; + +fail_alloc_2: + ps3_spe_irq_destroy(spu->irqs[1]); +fail_alloc_1: + ps3_spe_irq_destroy(spu->irqs[0]); +fail_alloc_0: + spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0; + return result; +} + +static int __init enable_spu(struct spu *spu) +{ + int result; + + result = lv1_enable_logical_spe(spu_pdata(spu)->spe_id, + spu_pdata(spu)->resource_id); + + if (result) { + pr_debug("%s:%d: lv1_enable_logical_spe failed: %s\n", + __func__, __LINE__, ps3_result(result)); + goto fail_enable; + } + + result = setup_areas(spu); + + if (result) + goto fail_areas; + + result = setup_interrupts(spu); + + if (result) + goto fail_interrupts; + + return 0; + +fail_interrupts: + spu_unmap(spu); +fail_areas: + lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0); +fail_enable: + return result; +} + +static int ps3_destroy_spu(struct spu *spu) +{ + int result; + + pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number); + + result = lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0); + BUG_ON(result); + + ps3_spe_irq_destroy(spu->irqs[2]); + ps3_spe_irq_destroy(spu->irqs[1]); + ps3_spe_irq_destroy(spu->irqs[0]); + + spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0; + + spu_unmap(spu); + + result = lv1_destruct_logical_spe(spu_pdata(spu)->spe_id); + BUG_ON(result); + + kfree(spu->pdata); + spu->pdata = NULL; + + return 0; +} + +static int __init ps3_create_spu(struct spu *spu, void *data) +{ + int result; + + pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number); + + spu->pdata = kzalloc(sizeof(struct spu_pdata), + GFP_KERNEL); + + if (!spu->pdata) { + result = -ENOMEM; + goto fail_malloc; + } + + spu_pdata(spu)->resource_id = (unsigned long)data; + + /* Init cached reg values to HV defaults. */ + + spu_pdata(spu)->cache.sr1 = 0x33; + + result = construct_spu(spu); + + if (result) + goto fail_construct; + + /* For now, just go ahead and enable it. */ + + result = enable_spu(spu); + + if (result) + goto fail_enable; + + /* Make sure the spu is in SPE_EX_STATE_EXECUTED. */ + + /* need something better here!!! */ + while (in_be64(&spu_pdata(spu)->shadow->spe_execution_status) + != SPE_EX_STATE_EXECUTED) + (void)0; + + return result; + +fail_enable: +fail_construct: + ps3_destroy_spu(spu); +fail_malloc: + return result; +} + +static int __init ps3_enumerate_spus(int (*fn)(void *data)) +{ + int result; + unsigned int num_resource_id; + unsigned int i; + + result = ps3_repository_read_num_spu_resource_id(&num_resource_id); + + pr_debug("%s:%d: num_resource_id %u\n", __func__, __LINE__, + num_resource_id); + + /* + * For now, just create logical spus equal to the number + * of physical spus reserved for the partition. + */ + + for (i = 0; i < num_resource_id; i++) { + enum ps3_spu_resource_type resource_type; + unsigned int resource_id; + + result = ps3_repository_read_spu_resource_id(i, + &resource_type, &resource_id); + + if (result) + break; + + if (resource_type == PS3_SPU_RESOURCE_TYPE_EXCLUSIVE) { + result = fn((void*)(unsigned long)resource_id); + + if (result) + break; + } + } + + if (result) { + printk(KERN_WARNING "%s:%d: Error initializing spus\n", + __func__, __LINE__); + return result; + } + + return num_resource_id; +} + +static int ps3_init_affinity(void) +{ + return 0; +} + +/** + * ps3_enable_spu - Enable SPU run control. + * + * An outstanding enhancement for the PS3 would be to add a guard to check + * for incorrect access to the spu problem state when the spu context is + * disabled. This check could be implemented with a flag added to the spu + * context that would inhibit mapping problem state pages, and a routine + * to unmap spu problem state pages. When the spu is enabled with + * ps3_enable_spu() the flag would be set allowing pages to be mapped, + * and when the spu is disabled with ps3_disable_spu() the flag would be + * cleared and the mapped problem state pages would be unmapped. + */ + +static void ps3_enable_spu(struct spu_context *ctx) +{ +} + +static void ps3_disable_spu(struct spu_context *ctx) +{ + ctx->ops->runcntl_stop(ctx); +} + +static const struct spu_management_ops spu_management_ps3_ops = { + .enumerate_spus = ps3_enumerate_spus, + .create_spu = ps3_create_spu, + .destroy_spu = ps3_destroy_spu, + .enable_spu = ps3_enable_spu, + .disable_spu = ps3_disable_spu, + .init_affinity = ps3_init_affinity, +}; + +/* spu_priv1_ops */ + +static void int_mask_and(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + /* are these serialized by caller??? */ + old_mask = spu_int_mask_get(spu, class); + spu_int_mask_set(spu, class, old_mask & mask); +} + +static void int_mask_or(struct spu *spu, int class, u64 mask) +{ + u64 old_mask; + + old_mask = spu_int_mask_get(spu, class); + spu_int_mask_set(spu, class, old_mask | mask); +} + +static void int_mask_set(struct spu *spu, int class, u64 mask) +{ + spu_pdata(spu)->cache.masks[class] = mask; + lv1_set_spe_interrupt_mask(spu_pdata(spu)->spe_id, class, + spu_pdata(spu)->cache.masks[class]); +} + +static u64 int_mask_get(struct spu *spu, int class) +{ + return spu_pdata(spu)->cache.masks[class]; +} + +static void int_stat_clear(struct spu *spu, int class, u64 stat) +{ + /* Note that MFC_DSISR will be cleared when class1[MF] is set. */ + + lv1_clear_spe_interrupt_status(spu_pdata(spu)->spe_id, class, + stat, 0); +} + +static u64 int_stat_get(struct spu *spu, int class) +{ + u64 stat; + + lv1_get_spe_interrupt_status(spu_pdata(spu)->spe_id, class, &stat); + return stat; +} + +static void cpu_affinity_set(struct spu *spu, int cpu) +{ + /* No support. */ +} + +static u64 mfc_dar_get(struct spu *spu) +{ + return in_be64(&spu_pdata(spu)->shadow->mfc_dar_RW); +} + +static void mfc_dsisr_set(struct spu *spu, u64 dsisr) +{ + /* Nothing to do, cleared in int_stat_clear(). */ +} + +static u64 mfc_dsisr_get(struct spu *spu) +{ + return in_be64(&spu_pdata(spu)->shadow->mfc_dsisr_RW); +} + +static void mfc_sdr_setup(struct spu *spu) +{ + /* Nothing to do. */ +} + +static void mfc_sr1_set(struct spu *spu, u64 sr1) +{ + /* Check bits allowed by HV. */ + + static const u64 allowed = ~(MFC_STATE1_LOCAL_STORAGE_DECODE_MASK + | MFC_STATE1_PROBLEM_STATE_MASK); + + BUG_ON((sr1 & allowed) != (spu_pdata(spu)->cache.sr1 & allowed)); + + spu_pdata(spu)->cache.sr1 = sr1; + lv1_set_spe_privilege_state_area_1_register( + spu_pdata(spu)->spe_id, + offsetof(struct spu_priv1, mfc_sr1_RW), + spu_pdata(spu)->cache.sr1); +} + +static u64 mfc_sr1_get(struct spu *spu) +{ + return spu_pdata(spu)->cache.sr1; +} + +static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id) +{ + spu_pdata(spu)->cache.tclass_id = tclass_id; + lv1_set_spe_privilege_state_area_1_register( + spu_pdata(spu)->spe_id, + offsetof(struct spu_priv1, mfc_tclass_id_RW), + spu_pdata(spu)->cache.tclass_id); +} + +static u64 mfc_tclass_id_get(struct spu *spu) +{ + return spu_pdata(spu)->cache.tclass_id; +} + +static void tlb_invalidate(struct spu *spu) +{ + /* Nothing to do. */ +} + +static void resource_allocation_groupID_set(struct spu *spu, u64 id) +{ + /* No support. */ +} + +static u64 resource_allocation_groupID_get(struct spu *spu) +{ + return 0; /* No support. */ +} + +static void resource_allocation_enable_set(struct spu *spu, u64 enable) +{ + /* No support. */ +} + +static u64 resource_allocation_enable_get(struct spu *spu) +{ + return 0; /* No support. */ +} + +static const struct spu_priv1_ops spu_priv1_ps3_ops = { + .int_mask_and = int_mask_and, + .int_mask_or = int_mask_or, + .int_mask_set = int_mask_set, + .int_mask_get = int_mask_get, + .int_stat_clear = int_stat_clear, + .int_stat_get = int_stat_get, + .cpu_affinity_set = cpu_affinity_set, + .mfc_dar_get = mfc_dar_get, + .mfc_dsisr_set = mfc_dsisr_set, + .mfc_dsisr_get = mfc_dsisr_get, + .mfc_sdr_setup = mfc_sdr_setup, + .mfc_sr1_set = mfc_sr1_set, + .mfc_sr1_get = mfc_sr1_get, + .mfc_tclass_id_set = mfc_tclass_id_set, + .mfc_tclass_id_get = mfc_tclass_id_get, + .tlb_invalidate = tlb_invalidate, + .resource_allocation_groupID_set = resource_allocation_groupID_set, + .resource_allocation_groupID_get = resource_allocation_groupID_get, + .resource_allocation_enable_set = resource_allocation_enable_set, + .resource_allocation_enable_get = resource_allocation_enable_get, +}; + +void ps3_spu_set_platform(void) +{ + spu_priv1_ops = &spu_priv1_ps3_ops; + spu_management_ops = &spu_management_ps3_ops; +} diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c new file mode 100644 index 0000000000..d6b5f5ecd5 --- /dev/null +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -0,0 +1,803 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 system bus driver. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "platform.h" + +static struct device ps3_system_bus = { + .init_name = "ps3_system", +}; + +/* FIXME: need device usage counters! */ +static struct { + struct mutex mutex; + int sb_11; /* usb 0 */ + int sb_12; /* usb 0 */ + int gpu; +} usage_hack; + +static int ps3_is_device(struct ps3_system_bus_device *dev, u64 bus_id, + u64 dev_id) +{ + return dev->bus_id == bus_id && dev->dev_id == dev_id; +} + +static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev) +{ + int result; + + BUG_ON(!dev->bus_id); + mutex_lock(&usage_hack.mutex); + + if (ps3_is_device(dev, 1, 1)) { + usage_hack.sb_11++; + if (usage_hack.sb_11 > 1) { + result = 0; + goto done; + } + } + + if (ps3_is_device(dev, 1, 2)) { + usage_hack.sb_12++; + if (usage_hack.sb_12 > 1) { + result = 0; + goto done; + } + } + + result = lv1_open_device(dev->bus_id, dev->dev_id, 0); + + if (result) { + pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n", + __func__, __LINE__, dev->match_id, dev->match_sub_id, + dev_name(&dev->core), ps3_result(result)); + result = -EPERM; + } + +done: + mutex_unlock(&usage_hack.mutex); + return result; +} + +static int ps3_close_hv_device_sb(struct ps3_system_bus_device *dev) +{ + int result; + + BUG_ON(!dev->bus_id); + mutex_lock(&usage_hack.mutex); + + if (ps3_is_device(dev, 1, 1)) { + usage_hack.sb_11--; + if (usage_hack.sb_11) { + result = 0; + goto done; + } + } + + if (ps3_is_device(dev, 1, 2)) { + usage_hack.sb_12--; + if (usage_hack.sb_12) { + result = 0; + goto done; + } + } + + result = lv1_close_device(dev->bus_id, dev->dev_id); + BUG_ON(result); + +done: + mutex_unlock(&usage_hack.mutex); + return result; +} + +static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev) +{ + int result; + + mutex_lock(&usage_hack.mutex); + + usage_hack.gpu++; + if (usage_hack.gpu > 1) { + result = 0; + goto done; + } + + result = lv1_gpu_open(0); + + if (result) { + pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__, + __LINE__, ps3_result(result)); + result = -EPERM; + } + +done: + mutex_unlock(&usage_hack.mutex); + return result; +} + +static int ps3_close_hv_device_gpu(struct ps3_system_bus_device *dev) +{ + int result; + + mutex_lock(&usage_hack.mutex); + + usage_hack.gpu--; + if (usage_hack.gpu) { + result = 0; + goto done; + } + + result = lv1_gpu_close(); + BUG_ON(result); + +done: + mutex_unlock(&usage_hack.mutex); + return result; +} + +int ps3_open_hv_device(struct ps3_system_bus_device *dev) +{ + BUG_ON(!dev); + pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id); + + switch (dev->match_id) { + case PS3_MATCH_ID_EHCI: + case PS3_MATCH_ID_OHCI: + case PS3_MATCH_ID_GELIC: + case PS3_MATCH_ID_STOR_DISK: + case PS3_MATCH_ID_STOR_ROM: + case PS3_MATCH_ID_STOR_FLASH: + return ps3_open_hv_device_sb(dev); + + case PS3_MATCH_ID_SOUND: + case PS3_MATCH_ID_GPU: + return ps3_open_hv_device_gpu(dev); + + case PS3_MATCH_ID_AV_SETTINGS: + case PS3_MATCH_ID_SYSTEM_MANAGER: + pr_debug("%s:%d: unsupported match_id: %u\n", __func__, + __LINE__, dev->match_id); + pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__, + dev->bus_id); + BUG(); + return -EINVAL; + + default: + break; + } + + pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__, + dev->match_id); + BUG(); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(ps3_open_hv_device); + +int ps3_close_hv_device(struct ps3_system_bus_device *dev) +{ + BUG_ON(!dev); + pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id); + + switch (dev->match_id) { + case PS3_MATCH_ID_EHCI: + case PS3_MATCH_ID_OHCI: + case PS3_MATCH_ID_GELIC: + case PS3_MATCH_ID_STOR_DISK: + case PS3_MATCH_ID_STOR_ROM: + case PS3_MATCH_ID_STOR_FLASH: + return ps3_close_hv_device_sb(dev); + + case PS3_MATCH_ID_SOUND: + case PS3_MATCH_ID_GPU: + return ps3_close_hv_device_gpu(dev); + + case PS3_MATCH_ID_AV_SETTINGS: + case PS3_MATCH_ID_SYSTEM_MANAGER: + pr_debug("%s:%d: unsupported match_id: %u\n", __func__, + __LINE__, dev->match_id); + pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__, + dev->bus_id); + BUG(); + return -EINVAL; + + default: + break; + } + + pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__, + dev->match_id); + BUG(); + return -ENODEV; +} +EXPORT_SYMBOL_GPL(ps3_close_hv_device); + +#define dump_mmio_region(_a) _dump_mmio_region(_a, __func__, __LINE__) +static void _dump_mmio_region(const struct ps3_mmio_region* r, + const char* func, int line) +{ + pr_debug("%s:%d: dev %llu:%llu\n", func, line, r->dev->bus_id, + r->dev->dev_id); + pr_debug("%s:%d: bus_addr %lxh\n", func, line, r->bus_addr); + pr_debug("%s:%d: len %lxh\n", func, line, r->len); + pr_debug("%s:%d: lpar_addr %lxh\n", func, line, r->lpar_addr); +} + +static int ps3_sb_mmio_region_create(struct ps3_mmio_region *r) +{ + int result; + u64 lpar_addr; + + result = lv1_map_device_mmio_region(r->dev->bus_id, r->dev->dev_id, + r->bus_addr, r->len, r->page_size, &lpar_addr); + r->lpar_addr = lpar_addr; + + if (result) { + pr_debug("%s:%d: lv1_map_device_mmio_region failed: %s\n", + __func__, __LINE__, ps3_result(result)); + r->lpar_addr = 0; + } + + dump_mmio_region(r); + return result; +} + +static int ps3_ioc0_mmio_region_create(struct ps3_mmio_region *r) +{ + /* device specific; do nothing currently */ + return 0; +} + +int ps3_mmio_region_create(struct ps3_mmio_region *r) +{ + return r->mmio_ops->create(r); +} +EXPORT_SYMBOL_GPL(ps3_mmio_region_create); + +static int ps3_sb_free_mmio_region(struct ps3_mmio_region *r) +{ + int result; + + dump_mmio_region(r); + result = lv1_unmap_device_mmio_region(r->dev->bus_id, r->dev->dev_id, + r->lpar_addr); + + if (result) + pr_debug("%s:%d: lv1_unmap_device_mmio_region failed: %s\n", + __func__, __LINE__, ps3_result(result)); + + r->lpar_addr = 0; + return result; +} + +static int ps3_ioc0_free_mmio_region(struct ps3_mmio_region *r) +{ + /* device specific; do nothing currently */ + return 0; +} + + +int ps3_free_mmio_region(struct ps3_mmio_region *r) +{ + return r->mmio_ops->free(r); +} + +EXPORT_SYMBOL_GPL(ps3_free_mmio_region); + +static const struct ps3_mmio_region_ops ps3_mmio_sb_region_ops = { + .create = ps3_sb_mmio_region_create, + .free = ps3_sb_free_mmio_region +}; + +static const struct ps3_mmio_region_ops ps3_mmio_ioc0_region_ops = { + .create = ps3_ioc0_mmio_region_create, + .free = ps3_ioc0_free_mmio_region +}; + +int ps3_mmio_region_init(struct ps3_system_bus_device *dev, + struct ps3_mmio_region *r, unsigned long bus_addr, unsigned long len, + enum ps3_mmio_page_size page_size) +{ + r->dev = dev; + r->bus_addr = bus_addr; + r->len = len; + r->page_size = page_size; + switch (dev->dev_type) { + case PS3_DEVICE_TYPE_SB: + r->mmio_ops = &ps3_mmio_sb_region_ops; + break; + case PS3_DEVICE_TYPE_IOC0: + r->mmio_ops = &ps3_mmio_ioc0_region_ops; + break; + default: + BUG(); + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(ps3_mmio_region_init); + +static int ps3_system_bus_match(struct device *_dev, + struct device_driver *_drv) +{ + int result; + struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv); + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + + if (!dev->match_sub_id) + result = dev->match_id == drv->match_id; + else + result = dev->match_sub_id == drv->match_sub_id && + dev->match_id == drv->match_id; + + if (result) + pr_info("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): match\n", + __func__, __LINE__, + dev->match_id, dev->match_sub_id, dev_name(&dev->core), + drv->match_id, drv->match_sub_id, drv->core.name); + else + pr_debug("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): miss\n", + __func__, __LINE__, + dev->match_id, dev->match_sub_id, dev_name(&dev->core), + drv->match_id, drv->match_sub_id, drv->core.name); + + return result; +} + +static int ps3_system_bus_probe(struct device *_dev) +{ + int result = 0; + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + struct ps3_system_bus_driver *drv; + + BUG_ON(!dev); + dev_dbg(_dev, "%s:%d\n", __func__, __LINE__); + + drv = ps3_system_bus_dev_to_system_bus_drv(dev); + BUG_ON(!drv); + + if (drv->probe) + result = drv->probe(dev); + else + pr_debug("%s:%d: %s no probe method\n", __func__, __LINE__, + dev_name(&dev->core)); + + pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core)); + return result; +} + +static void ps3_system_bus_remove(struct device *_dev) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + struct ps3_system_bus_driver *drv; + + BUG_ON(!dev); + dev_dbg(_dev, "%s:%d\n", __func__, __LINE__); + + drv = ps3_system_bus_dev_to_system_bus_drv(dev); + BUG_ON(!drv); + + if (drv->remove) + drv->remove(dev); + else + dev_dbg(&dev->core, "%s:%d %s: no remove method\n", + __func__, __LINE__, drv->core.name); + + pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core)); +} + +static void ps3_system_bus_shutdown(struct device *_dev) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + struct ps3_system_bus_driver *drv; + + BUG_ON(!dev); + + dev_dbg(&dev->core, " -> %s:%d: match_id %d\n", __func__, __LINE__, + dev->match_id); + + if (!dev->core.driver) { + dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__, + __LINE__); + return; + } + + drv = ps3_system_bus_dev_to_system_bus_drv(dev); + + BUG_ON(!drv); + + dev_dbg(&dev->core, "%s:%d: %s -> %s\n", __func__, __LINE__, + dev_name(&dev->core), drv->core.name); + + if (drv->shutdown) + drv->shutdown(dev); + else if (drv->remove) { + dev_dbg(&dev->core, "%s:%d %s: no shutdown, calling remove\n", + __func__, __LINE__, drv->core.name); + drv->remove(dev); + } else { + dev_dbg(&dev->core, "%s:%d %s: no shutdown method\n", + __func__, __LINE__, drv->core.name); + BUG(); + } + + dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__); +} + +static int ps3_system_bus_uevent(const struct device *_dev, struct kobj_uevent_env *env) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + + if (add_uevent_var(env, "MODALIAS=ps3:%d:%d", dev->match_id, + dev->match_sub_id)) + return -ENOMEM; + return 0; +} + +static ssize_t modalias_show(struct device *_dev, struct device_attribute *a, + char *buf) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + int len = snprintf(buf, PAGE_SIZE, "ps3:%d:%d\n", dev->match_id, + dev->match_sub_id); + + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *ps3_system_bus_dev_attrs[] = { + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ps3_system_bus_dev); + +static struct bus_type ps3_system_bus_type = { + .name = "ps3_system_bus", + .match = ps3_system_bus_match, + .uevent = ps3_system_bus_uevent, + .probe = ps3_system_bus_probe, + .remove = ps3_system_bus_remove, + .shutdown = ps3_system_bus_shutdown, + .dev_groups = ps3_system_bus_dev_groups, +}; + +static int __init ps3_system_bus_init(void) +{ + int result; + + if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) + return -ENODEV; + + pr_debug(" -> %s:%d\n", __func__, __LINE__); + + mutex_init(&usage_hack.mutex); + + result = device_register(&ps3_system_bus); + BUG_ON(result); + + result = bus_register(&ps3_system_bus_type); + BUG_ON(result); + + pr_debug(" <- %s:%d\n", __func__, __LINE__); + return result; +} + +core_initcall(ps3_system_bus_init); + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +static void * ps3_alloc_coherent(struct device *_dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + unsigned long attrs) +{ + int result; + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + unsigned long virt_addr; + + flag &= ~(__GFP_DMA | __GFP_HIGHMEM); + flag |= __GFP_ZERO; + + virt_addr = __get_free_pages(flag, get_order(size)); + + if (!virt_addr) { + pr_debug("%s:%d: get_free_pages failed\n", __func__, __LINE__); + goto clean_none; + } + + result = ps3_dma_map(dev->d_region, virt_addr, size, dma_handle, + CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | + CBE_IOPTE_SO_RW | CBE_IOPTE_M); + + if (result) { + pr_debug("%s:%d: ps3_dma_map failed (%d)\n", + __func__, __LINE__, result); + BUG_ON("check region type"); + goto clean_alloc; + } + + return (void*)virt_addr; + +clean_alloc: + free_pages(virt_addr, get_order(size)); +clean_none: + dma_handle = NULL; + return NULL; +} + +static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + + ps3_dma_unmap(dev->d_region, dma_handle, size); + free_pages((unsigned long)vaddr, get_order(size)); +} + +/* Creates TCEs for a user provided buffer. The user buffer must be + * contiguous real kernel storage (not vmalloc). The address passed here + * comprises a page address and offset into that page. The dma_addr_t + * returned will point to the same byte within the page as was passed in. + */ + +static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction, + unsigned long attrs) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + int result; + dma_addr_t bus_addr; + void *ptr = page_address(page) + offset; + + result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size, + &bus_addr, + CBE_IOPTE_PP_R | CBE_IOPTE_PP_W | + CBE_IOPTE_SO_RW | CBE_IOPTE_M); + + if (result) { + pr_debug("%s:%d: ps3_dma_map failed (%d)\n", + __func__, __LINE__, result); + } + + return bus_addr; +} + +static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + unsigned long attrs) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + int result; + dma_addr_t bus_addr; + u64 iopte_flag; + void *ptr = page_address(page) + offset; + + iopte_flag = CBE_IOPTE_M; + switch (direction) { + case DMA_BIDIRECTIONAL: + iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW; + break; + case DMA_TO_DEVICE: + iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_SO_R; + break; + case DMA_FROM_DEVICE: + iopte_flag |= CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW; + break; + default: + /* not happened */ + BUG(); + } + result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size, + &bus_addr, iopte_flag); + + if (result) { + pr_debug("%s:%d: ps3_dma_map failed (%d)\n", + __func__, __LINE__, result); + } + return bus_addr; +} + +static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction, unsigned long attrs) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + int result; + + result = ps3_dma_unmap(dev->d_region, dma_addr, size); + + if (result) { + pr_debug("%s:%d: ps3_dma_unmap failed (%d)\n", + __func__, __LINE__, result); + } +} + +static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, unsigned long attrs) +{ +#if defined(CONFIG_PS3_DYNAMIC_DMA) + BUG_ON("do"); + return -EPERM; +#else + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + int result = ps3_dma_map(dev->d_region, sg_phys(sg), + sg->length, &sg->dma_address, 0); + + if (result) { + pr_debug("%s:%d: ps3_dma_map failed (%d)\n", + __func__, __LINE__, result); + return -EINVAL; + } + + sg->dma_length = sg->length; + } + + return nents; +#endif +} + +static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg, + int nents, + enum dma_data_direction direction, + unsigned long attrs) +{ + BUG(); + return -EINVAL; +} + +static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, unsigned long attrs) +{ +#if defined(CONFIG_PS3_DYNAMIC_DMA) + BUG_ON("do"); +#endif +} + +static void ps3_ioc0_unmap_sg(struct device *_dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + BUG(); +} + +static int ps3_dma_supported(struct device *_dev, u64 mask) +{ + return mask >= DMA_BIT_MASK(32); +} + +static const struct dma_map_ops ps3_sb_dma_ops = { + .alloc = ps3_alloc_coherent, + .free = ps3_free_coherent, + .map_sg = ps3_sb_map_sg, + .unmap_sg = ps3_sb_unmap_sg, + .dma_supported = ps3_dma_supported, + .map_page = ps3_sb_map_page, + .unmap_page = ps3_unmap_page, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, +}; + +static const struct dma_map_ops ps3_ioc0_dma_ops = { + .alloc = ps3_alloc_coherent, + .free = ps3_free_coherent, + .map_sg = ps3_ioc0_map_sg, + .unmap_sg = ps3_ioc0_unmap_sg, + .dma_supported = ps3_dma_supported, + .map_page = ps3_ioc0_map_page, + .unmap_page = ps3_unmap_page, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, +}; + +/** + * ps3_system_bus_release_device - remove a device from the system bus + */ + +static void ps3_system_bus_release_device(struct device *_dev) +{ + struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); + kfree(dev); +} + +/** + * ps3_system_bus_device_register - add a device to the system bus + * + * ps3_system_bus_device_register() expects the dev object to be allocated + * dynamically by the caller. The system bus takes ownership of the dev + * object and frees the object in ps3_system_bus_release_device(). + */ + +int ps3_system_bus_device_register(struct ps3_system_bus_device *dev) +{ + int result; + static unsigned int dev_ioc0_count; + static unsigned int dev_sb_count; + static unsigned int dev_vuart_count; + static unsigned int dev_lpm_count; + + if (!dev->core.parent) + dev->core.parent = &ps3_system_bus; + dev->core.bus = &ps3_system_bus_type; + dev->core.release = ps3_system_bus_release_device; + + switch (dev->dev_type) { + case PS3_DEVICE_TYPE_IOC0: + dev->core.dma_ops = &ps3_ioc0_dma_ops; + dev_set_name(&dev->core, "ioc0_%02x", ++dev_ioc0_count); + break; + case PS3_DEVICE_TYPE_SB: + dev->core.dma_ops = &ps3_sb_dma_ops; + dev_set_name(&dev->core, "sb_%02x", ++dev_sb_count); + + break; + case PS3_DEVICE_TYPE_VUART: + dev_set_name(&dev->core, "vuart_%02x", ++dev_vuart_count); + break; + case PS3_DEVICE_TYPE_LPM: + dev_set_name(&dev->core, "lpm_%02x", ++dev_lpm_count); + break; + default: + BUG(); + } + + dev->core.of_node = NULL; + set_dev_node(&dev->core, 0); + + pr_debug("%s:%d add %s\n", __func__, __LINE__, dev_name(&dev->core)); + + result = device_register(&dev->core); + return result; +} + +EXPORT_SYMBOL_GPL(ps3_system_bus_device_register); + +int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv) +{ + int result; + + pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name); + + if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) + return -ENODEV; + + drv->core.bus = &ps3_system_bus_type; + + result = driver_register(&drv->core); + pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name); + return result; +} + +EXPORT_SYMBOL_GPL(ps3_system_bus_driver_register); + +void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv) +{ + pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name); + driver_unregister(&drv->core); + pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name); +} + +EXPORT_SYMBOL_GPL(ps3_system_bus_driver_unregister); diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c new file mode 100644 index 0000000000..c9bfc113a9 --- /dev/null +++ b/arch/powerpc/platforms/ps3/time.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PS3 time and rtc routines. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + */ + +#include +#include +#include + +#include +#include +#include + +#include "platform.h" + +void __init ps3_calibrate_decr(void) +{ + int result; + u64 tmp; + + result = ps3_repository_read_be_tb_freq(0, &tmp); + BUG_ON(result); + + ppc_tb_freq = tmp; + ppc_proc_freq = ppc_tb_freq * 40; +} + +static u64 read_rtc(void) +{ + int result; + u64 rtc_val; + u64 tb_val; + + result = lv1_get_rtc(&rtc_val, &tb_val); + BUG_ON(result); + + return rtc_val; +} + +time64_t __init ps3_get_boot_time(void) +{ + return read_rtc() + ps3_os_area_get_rtc_diff(); +} + +static int __init ps3_rtc_init(void) +{ + struct platform_device *pdev; + + if (!firmware_has_feature(FW_FEATURE_PS3_LV1)) + return -ENODEV; + + pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0); + + return PTR_ERR_OR_ZERO(pdev); +} +device_initcall(ps3_rtc_init); diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig new file mode 100644 index 0000000000..4ebf2ef284 --- /dev/null +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_PSERIES + depends on PPC64 && PPC_BOOK3S + bool "IBM pSeries & new (POWER5-based) iSeries" + select HAVE_PCSPKR_PLATFORM + select MPIC + select OF_DYNAMIC + select FORCE_PCI + select PCI_MSI + select GENERIC_ALLOCATOR + select PPC_XICS + select PPC_XIVE_SPAPR + select PPC_ICP_NATIVE + select PPC_ICP_HV + select PPC_ICS_RTAS + select PPC_I8259 + select PPC_RTAS + select PPC_RTAS_DAEMON + select RTAS_ERROR_LOGGING + select PPC_UDBG_16550 + select PPC_DOORBELL + select HOTPLUG_CPU + select FORCE_SMP + select SWIOTLB + select ARCH_SUPPORTS_PER_VMA_LOCK + default y + +config PARAVIRT + bool + +config PARAVIRT_SPINLOCKS + bool + +config PARAVIRT_TIME_ACCOUNTING + select PARAVIRT + bool + +config PPC_SPLPAR + bool "Support for shared-processor logical partitions" + depends on PPC_PSERIES + select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS + select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN + default y + help + Enabling this option will make the kernel run more efficiently + on logically-partitioned pSeries systems which use shared + processors, that is, which share physical processors between + two or more partitions. + + Say Y if you are unsure. + +config DTL + bool "Dispatch Trace Log" + depends on PPC_SPLPAR && DEBUG_FS + help + SPLPAR machines can log hypervisor preempt & dispatch events to a + kernel buffer. Saying Y here will enable logging these events, + which are accessible through a debugfs file. + + Say N if you are unsure. + +config PSERIES_ENERGY + tristate "pSeries energy management capabilities driver" + depends on PPC_PSERIES + default y + help + Provides interface to platform energy management capabilities + on supported PSERIES platforms. + Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list + and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint + +config IO_EVENT_IRQ + bool "IO Event Interrupt support" + depends on PPC_PSERIES + default y + help + Select this option, if you want to enable support for IO Event + interrupts. IO event interrupt is a mechanism provided by RTAS + to return information about hardware error and non-error events + which may need OS attention. RTAS returns events for multiple + event types and scopes. Device drivers can register their handlers + to receive events. + + This option will only enable the IO event platform code. You + will still need to enable or compile the actual drivers + that use this infrastructure to handle IO event interrupts. + + Say Y if you are unsure. + +config LPARCFG + bool "LPAR Configuration Data" + depends on PPC_PSERIES + help + Provide system capacity information via human readable + = pairs through a /proc/ppc64/lparcfg interface. + +config PPC_PSERIES_DEBUG + depends on PPC_PSERIES && PPC_EARLY_DEBUG + bool "Enable extra debug logging in platforms/pseries" + default y + help + Say Y here if you want the pseries core to produce a bunch of + debug messages to the system log. Select this if you are having a + problem with the pseries core and want to see more of what is + going on. This does not enable debugging in lpar.c, which must + be manually done due to its verbosity. + +config PPC_SMLPAR + bool "Support for shared-memory logical partitions" + depends on PPC_PSERIES + select LPARCFG + help + Select this option to enable shared memory partition support. + With this option a system running in an LPAR can be given more + memory than physically available and will allow firmware to + balance memory across many LPARs. + +config CMM + tristate "Collaborative memory management" + depends on PPC_SMLPAR + select MEMORY_BALLOON + default y + help + Select this option, if you want to enable the kernel interface + to reduce the memory size of the system. This is accomplished + by allocating pages of memory and put them "on hold". This only + makes sense for a system running in an LPAR where the unused pages + will be reused for other LPARs. The interface allows firmware to + balance memory across many LPARs. + +config HV_PERF_CTRS + bool "Hypervisor supplied PMU events (24x7 & GPCI)" + default y + depends on PERF_EVENTS && PPC_PSERIES + help + Enable access to hypervisor supplied counters in perf. Currently, + this enables code that uses the hcall GetPerfCounterInfo and 24x7 + interfaces to retrieve counters. GPCI exists on Power 6 and later + systems. 24x7 is available on Power 8 and later systems. + + If unsure, select Y. + +config IBMVIO + depends on PPC_PSERIES + bool + default y + +config IBMEBUS + depends on PPC_PSERIES && !CPU_LITTLE_ENDIAN + bool "Support for GX bus based adapters" + help + Bus device driver for GX bus based adapters. + +config PSERIES_PLPKS + depends on PPC_PSERIES + select NLS + bool + # PowerVM provides an isolated Platform Keystore (PKS) storage + # allocation for each LPAR with individually managed access + # controls to store sensitive information securely. It can be + # used to store asymmetric public keys or secrets as required + # by different usecases. + # + # This option is selected by in-kernel consumers that require + # access to the PKS. + +config PAPR_SCM + depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM + tristate "Support for the PAPR Storage Class Memory interface" + help + Enable access to hypervisor provided storage class memory. + +config PPC_SVM + bool "Secure virtual machine (SVM) support for POWER" + depends on PPC_PSERIES + select SWIOTLB + select ARCH_HAS_MEM_ENCRYPT + select ARCH_HAS_FORCE_DMA_UNENCRYPTED + select ARCH_HAS_CC_PLATFORM + help + There are certain POWER platforms which support secure guests using + the Protected Execution Facility, with the help of an Ultravisor + executing below the hypervisor layer. This enables support for + those guests. + + If unsure, say "N". diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile new file mode 100644 index 0000000000..53c3b91af2 --- /dev/null +++ b/arch/powerpc/platforms/pseries/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 +ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) +ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG + +obj-y := lpar.o hvCall.o nvram.o reconfig.o \ + of_helpers.o rtas-work-area.o papr-sysparm.o \ + setup.o iommu.o event_sources.o ras.o \ + firmware.o power.o dlpar.o mobility.o rng.o \ + pci.o pci_dlpar.o eeh_pseries.o msi.o \ + papr_platform_attributes.o dtl.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_KEXEC_CORE) += kexec.o +obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o + +obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o +obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o + +obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o +obj-$(CONFIG_HVCS) += hvcserver.o +obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o +obj-$(CONFIG_CMM) += cmm.o +obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o +obj-$(CONFIG_LPARCFG) += lparcfg.o +obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_IBMEBUS) += ibmebus.o +obj-$(CONFIG_PAPR_SCM) += papr_scm.o +obj-$(CONFIG_PPC_SPLPAR) += vphn.o +obj-$(CONFIG_PPC_SVM) += svm.o +obj-$(CONFIG_FA_DUMP) += rtas-fadump.o +obj-$(CONFIG_PSERIES_PLPKS) += plpks.o +obj-$(CONFIG_PPC_SECURE_BOOT) += plpks-secvar.o +obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o + +obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o + +# nothing that operates in real mode is safe for KASAN +KASAN_SANITIZE_ras.o := n +KASAN_SANITIZE_kexec.o := n diff --git a/arch/powerpc/platforms/pseries/cc_platform.c b/arch/powerpc/platforms/pseries/cc_platform.c new file mode 100644 index 0000000000..e8021af83a --- /dev/null +++ b/arch/powerpc/platforms/pseries/cc_platform.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#include +#include + +#include +#include + +bool cc_platform_has(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_MEM_ENCRYPT: + return is_secure_guest(); + + default: + return false; + } +} +EXPORT_SYMBOL_GPL(cc_platform_has); diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c new file mode 100644 index 0000000000..5f4037c1d7 --- /dev/null +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -0,0 +1,663 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Collaborative memory management interface. + * + * Copyright (C) 2008 IBM Corporation + * Author(s): Brian King (brking@linux.vnet.ibm.com), + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +#define CMM_DRIVER_VERSION "1.0.0" +#define CMM_DEFAULT_DELAY 1 +#define CMM_HOTPLUG_DELAY 5 +#define CMM_DEBUG 0 +#define CMM_DISABLE 0 +#define CMM_OOM_KB 1024 +#define CMM_MIN_MEM_MB 256 +#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) +#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) + +#define CMM_MEM_HOTPLUG_PRI 1 + +static unsigned int delay = CMM_DEFAULT_DELAY; +static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; +static unsigned int oom_kb = CMM_OOM_KB; +static unsigned int cmm_debug = CMM_DEBUG; +static unsigned int cmm_disabled = CMM_DISABLE; +static unsigned long min_mem_mb = CMM_MIN_MEM_MB; +static bool __read_mostly simulate; +static unsigned long simulate_loan_target_kb; +static struct device cmm_dev; + +MODULE_AUTHOR("Brian King "); +MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(CMM_DRIVER_VERSION); + +module_param_named(delay, delay, uint, 0644); +MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " + "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); +module_param_named(hotplug_delay, hotplug_delay, uint, 0644); +MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove " + "before loaning resumes. " + "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); +module_param_named(oom_kb, oom_kb, uint, 0644); +MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " + "[Default=" __stringify(CMM_OOM_KB) "]"); +module_param_named(min_mem_mb, min_mem_mb, ulong, 0644); +MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " + "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); +module_param_named(debug, cmm_debug, uint, 0644); +MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " + "[Default=" __stringify(CMM_DEBUG) "]"); +module_param_named(simulate, simulate, bool, 0444); +MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw)."); + +#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } + +static atomic_long_t loaned_pages; +static unsigned long loaned_pages_target; +static unsigned long oom_freed_pages; + +static DEFINE_MUTEX(hotplug_mutex); +static int hotplug_occurred; /* protected by the hotplug mutex */ + +static struct task_struct *cmm_thread_ptr; +static struct balloon_dev_info b_dev_info; + +static long plpar_page_set_loaned(struct page *page) +{ + const unsigned long vpa = page_to_phys(page); + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + if (unlikely(simulate)) + return 0; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, + vpa + i - cmo_page_sz, 0); + + return rc; +} + +static long plpar_page_set_active(struct page *page) +{ + const unsigned long vpa = page_to_phys(page); + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + if (unlikely(simulate)) + return 0; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, + vpa + i - cmo_page_sz, 0); + + return rc; +} + +/** + * cmm_alloc_pages - Allocate pages and mark them as loaned + * @nr: number of pages to allocate + * + * Return value: + * number of pages requested to be allocated which were not + **/ +static long cmm_alloc_pages(long nr) +{ + struct page *page; + long rc; + + cmm_dbg("Begin request for %ld pages\n", nr); + + while (nr) { + /* Exit if a hotplug operation is in progress or occurred */ + if (mutex_trylock(&hotplug_mutex)) { + if (hotplug_occurred) { + mutex_unlock(&hotplug_mutex); + break; + } + mutex_unlock(&hotplug_mutex); + } else { + break; + } + + page = balloon_page_alloc(); + if (!page) + break; + rc = plpar_page_set_loaned(page); + if (rc) { + pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); + __free_page(page); + break; + } + + balloon_page_enqueue(&b_dev_info, page); + atomic_long_inc(&loaned_pages); + adjust_managed_page_count(page, -1); + nr--; + } + + cmm_dbg("End request with %ld pages unfulfilled\n", nr); + return nr; +} + +/** + * cmm_free_pages - Free pages and mark them as active + * @nr: number of pages to free + * + * Return value: + * number of pages requested to be freed which were not + **/ +static long cmm_free_pages(long nr) +{ + struct page *page; + + cmm_dbg("Begin free of %ld pages.\n", nr); + while (nr) { + page = balloon_page_dequeue(&b_dev_info); + if (!page) + break; + plpar_page_set_active(page); + adjust_managed_page_count(page, 1); + __free_page(page); + atomic_long_dec(&loaned_pages); + nr--; + } + cmm_dbg("End request with %ld pages unfulfilled\n", nr); + return nr; +} + +/** + * cmm_oom_notify - OOM notifier + * @self: notifier block struct + * @dummy: not used + * @parm: returned - number of pages freed + * + * Return value: + * NOTIFY_OK + **/ +static int cmm_oom_notify(struct notifier_block *self, + unsigned long dummy, void *parm) +{ + unsigned long *freed = parm; + long nr = KB2PAGES(oom_kb); + + cmm_dbg("OOM processing started\n"); + nr = cmm_free_pages(nr); + loaned_pages_target = atomic_long_read(&loaned_pages); + *freed += KB2PAGES(oom_kb) - nr; + oom_freed_pages += KB2PAGES(oom_kb) - nr; + cmm_dbg("OOM processing complete\n"); + return NOTIFY_OK; +} + +/** + * cmm_get_mpp - Read memory performance parameters + * + * Makes hcall to query the current page loan request from the hypervisor. + * + * Return value: + * nothing + **/ +static void cmm_get_mpp(void) +{ + const long __loaned_pages = atomic_long_read(&loaned_pages); + const long total_pages = totalram_pages() + __loaned_pages; + int rc; + struct hvcall_mpp_data mpp_data; + signed long active_pages_target, page_loan_request, target; + signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; + + if (likely(!simulate)) { + rc = h_get_mpp(&mpp_data); + if (rc != H_SUCCESS) + return; + page_loan_request = div_s64((s64)mpp_data.loan_request, + PAGE_SIZE); + target = page_loan_request + __loaned_pages; + } else { + target = KB2PAGES(simulate_loan_target_kb); + page_loan_request = target - __loaned_pages; + } + + if (target < 0 || total_pages < min_mem_pages) + target = 0; + + if (target > oom_freed_pages) + target -= oom_freed_pages; + else + target = 0; + + active_pages_target = total_pages - target; + + if (min_mem_pages > active_pages_target) + target = total_pages - min_mem_pages; + + if (target < 0) + target = 0; + + loaned_pages_target = target; + + cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", + page_loan_request, __loaned_pages, loaned_pages_target, + oom_freed_pages, totalram_pages()); +} + +static struct notifier_block cmm_oom_nb = { + .notifier_call = cmm_oom_notify +}; + +/** + * cmm_thread - CMM task thread + * @dummy: not used + * + * Return value: + * 0 + **/ +static int cmm_thread(void *dummy) +{ + unsigned long timeleft; + long __loaned_pages; + + while (1) { + timeleft = msleep_interruptible(delay * 1000); + + if (kthread_should_stop() || timeleft) + break; + + if (mutex_trylock(&hotplug_mutex)) { + if (hotplug_occurred) { + hotplug_occurred = 0; + mutex_unlock(&hotplug_mutex); + cmm_dbg("Hotplug operation has occurred, " + "loaning activity suspended " + "for %d seconds.\n", + hotplug_delay); + timeleft = msleep_interruptible(hotplug_delay * + 1000); + if (kthread_should_stop() || timeleft) + break; + continue; + } + mutex_unlock(&hotplug_mutex); + } else { + cmm_dbg("Hotplug operation in progress, activity " + "suspended\n"); + continue; + } + + cmm_get_mpp(); + + __loaned_pages = atomic_long_read(&loaned_pages); + if (loaned_pages_target > __loaned_pages) { + if (cmm_alloc_pages(loaned_pages_target - __loaned_pages)) + loaned_pages_target = __loaned_pages; + } else if (loaned_pages_target < __loaned_pages) + cmm_free_pages(__loaned_pages - loaned_pages_target); + } + return 0; +} + +#define CMM_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, 0444, show_##name, NULL) + +CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages))); +CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); + +static ssize_t show_oom_pages(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages)); +} + +static ssize_t store_oom_pages(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val = simple_strtoul (buf, NULL, 10); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (val != 0) + return -EBADMSG; + + oom_freed_pages = 0; + return count; +} + +static DEVICE_ATTR(oom_freed_kb, 0644, + show_oom_pages, store_oom_pages); + +static struct device_attribute *cmm_attrs[] = { + &dev_attr_loaned_kb, + &dev_attr_loaned_target_kb, + &dev_attr_oom_freed_kb, +}; + +static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644, + simulate_loan_target_kb); + +static struct bus_type cmm_subsys = { + .name = "cmm", + .dev_name = "cmm", +}; + +static void cmm_release_device(struct device *dev) +{ +} + +/** + * cmm_sysfs_register - Register with sysfs + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_sysfs_register(struct device *dev) +{ + int i, rc; + + if ((rc = subsys_system_register(&cmm_subsys, NULL))) + return rc; + + dev->id = 0; + dev->bus = &cmm_subsys; + dev->release = cmm_release_device; + + if ((rc = device_register(dev))) + goto subsys_unregister; + + for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) { + if ((rc = device_create_file(dev, cmm_attrs[i]))) + goto fail; + } + + if (!simulate) + return 0; + rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr); + if (rc) + goto fail; + return 0; + +fail: + while (--i >= 0) + device_remove_file(dev, cmm_attrs[i]); + device_unregister(dev); +subsys_unregister: + bus_unregister(&cmm_subsys); + return rc; +} + +/** + * cmm_unregister_sysfs - Unregister from sysfs + * + **/ +static void cmm_unregister_sysfs(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) + device_remove_file(dev, cmm_attrs[i]); + device_unregister(dev); + bus_unregister(&cmm_subsys); +} + +/** + * cmm_reboot_notifier - Make sure pages are not still marked as "loaned" + * + **/ +static int cmm_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + if (action == SYS_RESTART) { + if (cmm_thread_ptr) + kthread_stop(cmm_thread_ptr); + cmm_thread_ptr = NULL; + cmm_free_pages(atomic_long_read(&loaned_pages)); + } + return NOTIFY_DONE; +} + +static struct notifier_block cmm_reboot_nb = { + .notifier_call = cmm_reboot_notifier, +}; + +/** + * cmm_memory_cb - Handle memory hotplug notifier calls + * @self: notifier block struct + * @action: action to take + * @arg: struct memory_notify data for handler + * + * Return value: + * NOTIFY_OK or notifier error based on subfunction return value + * + **/ +static int cmm_memory_cb(struct notifier_block *self, + unsigned long action, void *arg) +{ + switch (action) { + case MEM_GOING_OFFLINE: + mutex_lock(&hotplug_mutex); + hotplug_occurred = 1; + break; + case MEM_OFFLINE: + case MEM_CANCEL_OFFLINE: + mutex_unlock(&hotplug_mutex); + cmm_dbg("Memory offline operation complete.\n"); + break; + case MEM_GOING_ONLINE: + case MEM_ONLINE: + case MEM_CANCEL_ONLINE: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block cmm_mem_nb = { + .notifier_call = cmm_memory_cb, + .priority = CMM_MEM_HOTPLUG_PRI +}; + +#ifdef CONFIG_BALLOON_COMPACTION +static int cmm_migratepage(struct balloon_dev_info *b_dev_info, + struct page *newpage, struct page *page, + enum migrate_mode mode) +{ + unsigned long flags; + + /* + * loan/"inflate" the newpage first. + * + * We might race against the cmm_thread who might discover after our + * loan request that another page is to be unloaned. However, once + * the cmm_thread runs again later, this error will automatically + * be corrected. + */ + if (plpar_page_set_loaned(newpage)) { + /* Unlikely, but possible. Tell the caller not to retry now. */ + pr_err_ratelimited("%s: Cannot set page to loaned.", __func__); + return -EBUSY; + } + + /* balloon page list reference */ + get_page(newpage); + + /* + * When we migrate a page to a different zone, we have to fixup the + * count of both involved zones as we adjusted the managed page count + * when inflating. + */ + if (page_zone(page) != page_zone(newpage)) { + adjust_managed_page_count(page, 1); + adjust_managed_page_count(newpage, -1); + } + + spin_lock_irqsave(&b_dev_info->pages_lock, flags); + balloon_page_insert(b_dev_info, newpage); + balloon_page_delete(page); + b_dev_info->isolated_pages--; + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); + + /* + * activate/"deflate" the old page. We ignore any errors just like the + * other callers. + */ + plpar_page_set_active(page); + + /* balloon page list reference */ + put_page(page); + + return MIGRATEPAGE_SUCCESS; +} + +static void cmm_balloon_compaction_init(void) +{ + balloon_devinfo_init(&b_dev_info); + b_dev_info.migratepage = cmm_migratepage; +} +#else /* CONFIG_BALLOON_COMPACTION */ +static void cmm_balloon_compaction_init(void) +{ +} +#endif /* CONFIG_BALLOON_COMPACTION */ + +/** + * cmm_init - Module initialization + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_init(void) +{ + int rc; + + if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) + return -EOPNOTSUPP; + + cmm_balloon_compaction_init(); + + rc = register_oom_notifier(&cmm_oom_nb); + if (rc < 0) + goto out_balloon_compaction; + + if ((rc = register_reboot_notifier(&cmm_reboot_nb))) + goto out_oom_notifier; + + if ((rc = cmm_sysfs_register(&cmm_dev))) + goto out_reboot_notifier; + + rc = register_memory_notifier(&cmm_mem_nb); + if (rc) + goto out_unregister_notifier; + + if (cmm_disabled) + return 0; + + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + if (IS_ERR(cmm_thread_ptr)) { + rc = PTR_ERR(cmm_thread_ptr); + goto out_unregister_notifier; + } + + return 0; +out_unregister_notifier: + unregister_memory_notifier(&cmm_mem_nb); + cmm_unregister_sysfs(&cmm_dev); +out_reboot_notifier: + unregister_reboot_notifier(&cmm_reboot_nb); +out_oom_notifier: + unregister_oom_notifier(&cmm_oom_nb); +out_balloon_compaction: + return rc; +} + +/** + * cmm_exit - Module exit + * + * Return value: + * nothing + **/ +static void cmm_exit(void) +{ + if (cmm_thread_ptr) + kthread_stop(cmm_thread_ptr); + unregister_oom_notifier(&cmm_oom_nb); + unregister_reboot_notifier(&cmm_reboot_nb); + unregister_memory_notifier(&cmm_mem_nb); + cmm_free_pages(atomic_long_read(&loaned_pages)); + cmm_unregister_sysfs(&cmm_dev); +} + +/** + * cmm_set_disable - Disable/Enable CMM + * + * Return value: + * 0 on success / other on failure + **/ +static int cmm_set_disable(const char *val, const struct kernel_param *kp) +{ + int disable = simple_strtoul(val, NULL, 10); + + if (disable != 0 && disable != 1) + return -EINVAL; + + if (disable && !cmm_disabled) { + if (cmm_thread_ptr) + kthread_stop(cmm_thread_ptr); + cmm_thread_ptr = NULL; + cmm_free_pages(atomic_long_read(&loaned_pages)); + } else if (!disable && cmm_disabled) { + cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); + if (IS_ERR(cmm_thread_ptr)) + return PTR_ERR(cmm_thread_ptr); + } + + cmm_disabled = disable; + return 0; +} + +module_param_call(disable, cmm_set_disable, param_get_uint, + &cmm_disabled, 0644); +MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " + "[Default=" __stringify(CMM_DISABLE) "]"); + +module_init(cmm_init); +module_exit(cmm_exit); diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c new file mode 100644 index 0000000000..47f8eabd1b --- /dev/null +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -0,0 +1,583 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support for dynamic reconfiguration for PCI, Memory, and CPU + * Hotplug and Dynamic Logical Partitioning on RPA platforms. + * + * Copyright (C) 2009 Nathan Fontenot + * Copyright (C) 2009 IBM Corporation + */ + +#define pr_fmt(fmt) "dlpar: " fmt + +#include +#include +#include +#include +#include +#include + +#include "of_helpers.h" +#include "pseries.h" + +#include +#include +#include +#include + +static struct workqueue_struct *pseries_hp_wq; + +struct pseries_hp_work { + struct work_struct work; + struct pseries_hp_errorlog *errlog; +}; + +struct cc_workarea { + __be32 drc_index; + __be32 zero; + __be32 name_offset; + __be32 prop_length; + __be32 prop_offset; +}; + +void dlpar_free_cc_property(struct property *prop) +{ + kfree(prop->name); + kfree(prop->value); + kfree(prop); +} + +static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) +{ + struct property *prop; + char *name; + char *value; + + prop = kzalloc(sizeof(*prop), GFP_KERNEL); + if (!prop) + return NULL; + + name = (char *)ccwa + be32_to_cpu(ccwa->name_offset); + prop->name = kstrdup(name, GFP_KERNEL); + if (!prop->name) { + dlpar_free_cc_property(prop); + return NULL; + } + + prop->length = be32_to_cpu(ccwa->prop_length); + value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset); + prop->value = kmemdup(value, prop->length, GFP_KERNEL); + if (!prop->value) { + dlpar_free_cc_property(prop); + return NULL; + } + + return prop; +} + +static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa) +{ + struct device_node *dn; + const char *name; + + dn = kzalloc(sizeof(*dn), GFP_KERNEL); + if (!dn) + return NULL; + + name = (const char *)ccwa + be32_to_cpu(ccwa->name_offset); + dn->full_name = kstrdup(name, GFP_KERNEL); + if (!dn->full_name) { + kfree(dn); + return NULL; + } + + of_node_set_flag(dn, OF_DYNAMIC); + of_node_init(dn); + + return dn; +} + +static void dlpar_free_one_cc_node(struct device_node *dn) +{ + struct property *prop; + + while (dn->properties) { + prop = dn->properties; + dn->properties = prop->next; + dlpar_free_cc_property(prop); + } + + kfree(dn->full_name); + kfree(dn); +} + +void dlpar_free_cc_nodes(struct device_node *dn) +{ + if (dn->child) + dlpar_free_cc_nodes(dn->child); + + if (dn->sibling) + dlpar_free_cc_nodes(dn->sibling); + + dlpar_free_one_cc_node(dn); +} + +#define COMPLETE 0 +#define NEXT_SIBLING 1 +#define NEXT_CHILD 2 +#define NEXT_PROPERTY 3 +#define PREV_PARENT 4 +#define MORE_MEMORY 5 +#define ERR_CFG_USE -9003 + +struct device_node *dlpar_configure_connector(__be32 drc_index, + struct device_node *parent) +{ + struct device_node *dn; + struct device_node *first_dn = NULL; + struct device_node *last_dn = NULL; + struct property *property; + struct property *last_property = NULL; + struct cc_workarea *ccwa; + struct rtas_work_area *work_area; + char *data_buf; + int cc_token; + int rc = -1; + + cc_token = rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR); + if (cc_token == RTAS_UNKNOWN_SERVICE) + return NULL; + + work_area = rtas_work_area_alloc(SZ_4K); + data_buf = rtas_work_area_raw_buf(work_area); + + ccwa = (struct cc_workarea *)&data_buf[0]; + ccwa->drc_index = drc_index; + ccwa->zero = 0; + + do { + do { + rc = rtas_call(cc_token, 2, 1, NULL, + rtas_work_area_phys(work_area), NULL); + } while (rtas_busy_delay(rc)); + + switch (rc) { + case COMPLETE: + break; + + case NEXT_SIBLING: + dn = dlpar_parse_cc_node(ccwa); + if (!dn) + goto cc_error; + + dn->parent = last_dn->parent; + last_dn->sibling = dn; + last_dn = dn; + break; + + case NEXT_CHILD: + dn = dlpar_parse_cc_node(ccwa); + if (!dn) + goto cc_error; + + if (!first_dn) { + dn->parent = parent; + first_dn = dn; + } else { + dn->parent = last_dn; + if (last_dn) + last_dn->child = dn; + } + + last_dn = dn; + break; + + case NEXT_PROPERTY: + property = dlpar_parse_cc_property(ccwa); + if (!property) + goto cc_error; + + if (!last_dn->properties) + last_dn->properties = property; + else + last_property->next = property; + + last_property = property; + break; + + case PREV_PARENT: + last_dn = last_dn->parent; + break; + + case MORE_MEMORY: + case ERR_CFG_USE: + default: + printk(KERN_ERR "Unexpected Error (%d) " + "returned from configure-connector\n", rc); + goto cc_error; + } + } while (rc); + +cc_error: + rtas_work_area_free(work_area); + + if (rc) { + if (first_dn) + dlpar_free_cc_nodes(first_dn); + + return NULL; + } + + return first_dn; +} + +int dlpar_attach_node(struct device_node *dn, struct device_node *parent) +{ + int rc; + + dn->parent = parent; + + rc = of_attach_node(dn); + if (rc) { + printk(KERN_ERR "Failed to add device node %pOF\n", dn); + return rc; + } + + return 0; +} + +int dlpar_detach_node(struct device_node *dn) +{ + struct device_node *child; + int rc; + + child = of_get_next_child(dn, NULL); + while (child) { + dlpar_detach_node(child); + child = of_get_next_child(dn, child); + } + + rc = of_detach_node(dn); + if (rc) + return rc; + + of_node_put(dn); + + return 0; +} + +#define DR_ENTITY_SENSE 9003 +#define DR_ENTITY_PRESENT 1 +#define DR_ENTITY_UNUSABLE 2 +#define ALLOCATION_STATE 9003 +#define ALLOC_UNUSABLE 0 +#define ALLOC_USABLE 1 +#define ISOLATION_STATE 9001 +#define ISOLATE 0 +#define UNISOLATE 1 + +int dlpar_acquire_drc(u32 drc_index) +{ + int dr_status, rc; + + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); + if (rc || dr_status != DR_ENTITY_UNUSABLE) + return -1; + + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE); + if (rc) + return rc; + + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); + if (rc) { + rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE); + return rc; + } + + return 0; +} + +int dlpar_release_drc(u32 drc_index) +{ + int dr_status, rc; + + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); + if (rc || dr_status != DR_ENTITY_PRESENT) + return -1; + + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE); + if (rc) + return rc; + + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE); + if (rc) { + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); + return rc; + } + + return 0; +} + +int dlpar_unisolate_drc(u32 drc_index) +{ + int dr_status, rc; + + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); + if (rc || dr_status != DR_ENTITY_PRESENT) + return -1; + + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); + + return 0; +} + +int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) +{ + int rc; + + /* pseries error logs are in BE format, convert to cpu type */ + switch (hp_elog->id_type) { + case PSERIES_HP_ELOG_ID_DRC_COUNT: + hp_elog->_drc_u.drc_count = + be32_to_cpu(hp_elog->_drc_u.drc_count); + break; + case PSERIES_HP_ELOG_ID_DRC_INDEX: + hp_elog->_drc_u.drc_index = + be32_to_cpu(hp_elog->_drc_u.drc_index); + break; + case PSERIES_HP_ELOG_ID_DRC_IC: + hp_elog->_drc_u.ic.count = + be32_to_cpu(hp_elog->_drc_u.ic.count); + hp_elog->_drc_u.ic.index = + be32_to_cpu(hp_elog->_drc_u.ic.index); + } + + switch (hp_elog->resource) { + case PSERIES_HP_ELOG_RESOURCE_MEM: + rc = dlpar_memory(hp_elog); + break; + case PSERIES_HP_ELOG_RESOURCE_CPU: + rc = dlpar_cpu(hp_elog); + break; + case PSERIES_HP_ELOG_RESOURCE_PMEM: + rc = dlpar_hp_pmem(hp_elog); + break; + + default: + pr_warn_ratelimited("Invalid resource (%d) specified\n", + hp_elog->resource); + rc = -EINVAL; + } + + return rc; +} + +static void pseries_hp_work_fn(struct work_struct *work) +{ + struct pseries_hp_work *hp_work = + container_of(work, struct pseries_hp_work, work); + + handle_dlpar_errorlog(hp_work->errlog); + + kfree(hp_work->errlog); + kfree(work); +} + +void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog) +{ + struct pseries_hp_work *work; + struct pseries_hp_errorlog *hp_errlog_copy; + + hp_errlog_copy = kmemdup(hp_errlog, sizeof(*hp_errlog), GFP_ATOMIC); + if (!hp_errlog_copy) + return; + + work = kmalloc(sizeof(struct pseries_hp_work), GFP_ATOMIC); + if (work) { + INIT_WORK((struct work_struct *)work, pseries_hp_work_fn); + work->errlog = hp_errlog_copy; + queue_work(pseries_hp_wq, (struct work_struct *)work); + } else { + kfree(hp_errlog_copy); + } +} + +static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog) +{ + char *arg; + + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; + + if (sysfs_streq(arg, "memory")) { + hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; + } else if (sysfs_streq(arg, "cpu")) { + hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU; + } else { + pr_err("Invalid resource specified.\n"); + return -EINVAL; + } + + return 0; +} + +static int dlpar_parse_action(char **cmd, struct pseries_hp_errorlog *hp_elog) +{ + char *arg; + + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; + + if (sysfs_streq(arg, "add")) { + hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD; + } else if (sysfs_streq(arg, "remove")) { + hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE; + } else { + pr_err("Invalid action specified.\n"); + return -EINVAL; + } + + return 0; +} + +static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog) +{ + char *arg; + u32 count, index; + + arg = strsep(cmd, " "); + if (!arg) + return -EINVAL; + + if (sysfs_streq(arg, "indexed-count")) { + hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_IC; + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC count specified.\n"); + return -EINVAL; + } + + if (kstrtou32(arg, 0, &count)) { + pr_err("Invalid DRC count specified.\n"); + return -EINVAL; + } + + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC Index specified.\n"); + return -EINVAL; + } + + if (kstrtou32(arg, 0, &index)) { + pr_err("Invalid DRC Index specified.\n"); + return -EINVAL; + } + + hp_elog->_drc_u.ic.count = cpu_to_be32(count); + hp_elog->_drc_u.ic.index = cpu_to_be32(index); + } else if (sysfs_streq(arg, "index")) { + hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC Index specified.\n"); + return -EINVAL; + } + + if (kstrtou32(arg, 0, &index)) { + pr_err("Invalid DRC Index specified.\n"); + return -EINVAL; + } + + hp_elog->_drc_u.drc_index = cpu_to_be32(index); + } else if (sysfs_streq(arg, "count")) { + hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT; + arg = strsep(cmd, " "); + if (!arg) { + pr_err("No DRC count specified.\n"); + return -EINVAL; + } + + if (kstrtou32(arg, 0, &count)) { + pr_err("Invalid DRC count specified.\n"); + return -EINVAL; + } + + hp_elog->_drc_u.drc_count = cpu_to_be32(count); + } else { + pr_err("Invalid id_type specified.\n"); + return -EINVAL; + } + + return 0; +} + +static ssize_t dlpar_store(const struct class *class, const struct class_attribute *attr, + const char *buf, size_t count) +{ + struct pseries_hp_errorlog hp_elog; + char *argbuf; + char *args; + int rc; + + args = argbuf = kstrdup(buf, GFP_KERNEL); + if (!argbuf) + return -ENOMEM; + + /* + * Parse out the request from the user, this will be in the form: + * + */ + rc = dlpar_parse_resource(&args, &hp_elog); + if (rc) + goto dlpar_store_out; + + rc = dlpar_parse_action(&args, &hp_elog); + if (rc) + goto dlpar_store_out; + + rc = dlpar_parse_id_type(&args, &hp_elog); + if (rc) + goto dlpar_store_out; + + rc = handle_dlpar_errorlog(&hp_elog); + +dlpar_store_out: + kfree(argbuf); + + if (rc) + pr_err("Could not handle DLPAR request \"%s\"\n", buf); + + return rc ? rc : count; +} + +static ssize_t dlpar_show(const struct class *class, const struct class_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", "memory,cpu"); +} + +static CLASS_ATTR_RW(dlpar); + +int __init dlpar_workqueue_init(void) +{ + if (pseries_hp_wq) + return 0; + + pseries_hp_wq = alloc_ordered_workqueue("pseries hotplug workqueue", 0); + + return pseries_hp_wq ? 0 : -ENOMEM; +} + +static int __init dlpar_sysfs_init(void) +{ + int rc; + + rc = dlpar_workqueue_init(); + if (rc) + return rc; + + return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); +} +machine_device_initcall(pseries, dlpar_sysfs_init); + diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c new file mode 100644 index 0000000000..3f1cdccebc --- /dev/null +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Virtual Processor Dispatch Trace Log + * + * (C) Copyright IBM Corporation 2009 + * + * Author: Jeremy Kerr + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DTL +struct dtl { + struct dtl_entry *buf; + int cpu; + int buf_entries; + u64 last_idx; + spinlock_t lock; +}; +static DEFINE_PER_CPU(struct dtl, cpu_dtl); + +static u8 dtl_event_mask = DTL_LOG_ALL; + + +/* + * Size of per-cpu log buffers. Firmware requires that the buffer does + * not cross a 4k boundary. + */ +static int dtl_buf_entries = N_DISPATCH_LOG; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + +/* + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls + * reading from the dispatch trace log. If other code wants to consume + * DTL entries, it can set this pointer to a function that will get + * called once for each DTL entry that gets processed. + */ +static void (*dtl_consumer)(struct dtl_entry *entry, u64 index); + +struct dtl_ring { + u64 write_index; + struct dtl_entry *write_ptr; + struct dtl_entry *buf; + struct dtl_entry *buf_end; +}; + +static DEFINE_PER_CPU(struct dtl_ring, dtl_rings); + +static atomic_t dtl_count; + +/* + * The cpu accounting code controls the DTL ring buffer, and we get + * given entries as they are processed. + */ +static void consume_dtle(struct dtl_entry *dtle, u64 index) +{ + struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); + struct dtl_entry *wp = dtlr->write_ptr; + struct lppaca *vpa = local_paca->lppaca_ptr; + + if (!wp) + return; + + *wp = *dtle; + barrier(); + + /* check for hypervisor ring buffer overflow, ignore this entry if so */ + if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) + return; + + ++wp; + if (wp == dtlr->buf_end) + wp = dtlr->buf; + dtlr->write_ptr = wp; + + /* incrementing write_index makes the new entry visible */ + smp_wmb(); + ++dtlr->write_index; +} + +static int dtl_start(struct dtl *dtl) +{ + struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); + + dtlr->buf = dtl->buf; + dtlr->buf_end = dtl->buf + dtl->buf_entries; + dtlr->write_index = 0; + + /* setting write_ptr enables logging into our buffer */ + smp_wmb(); + dtlr->write_ptr = dtl->buf; + + /* enable event logging */ + lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask; + + dtl_consumer = consume_dtle; + atomic_inc(&dtl_count); + return 0; +} + +static void dtl_stop(struct dtl *dtl) +{ + struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); + + dtlr->write_ptr = NULL; + smp_wmb(); + + dtlr->buf = NULL; + + /* restore dtl_enable_mask */ + lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT; + + if (atomic_dec_and_test(&dtl_count)) + dtl_consumer = NULL; +} + +static u64 dtl_current_index(struct dtl *dtl) +{ + return per_cpu(dtl_rings, dtl->cpu).write_index; +} + +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +static int dtl_start(struct dtl *dtl) +{ + unsigned long addr; + int ret, hwcpu; + + /* Register our dtl buffer with the hypervisor. The HV expects the + * buffer size to be passed in the second word of the buffer */ + ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); + + hwcpu = get_hard_smp_processor_id(dtl->cpu); + addr = __pa(dtl->buf); + ret = register_dtl(hwcpu, addr); + if (ret) { + printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) " + "failed with %d\n", __func__, dtl->cpu, hwcpu, ret); + return -EIO; + } + + /* set our initial buffer indices */ + lppaca_of(dtl->cpu).dtl_idx = 0; + + /* ensure that our updates to the lppaca fields have occurred before + * we actually enable the logging */ + smp_wmb(); + + /* enable event logging */ + lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask; + + return 0; +} + +static void dtl_stop(struct dtl *dtl) +{ + int hwcpu = get_hard_smp_processor_id(dtl->cpu); + + lppaca_of(dtl->cpu).dtl_enable_mask = 0x0; + + unregister_dtl(hwcpu); +} + +static u64 dtl_current_index(struct dtl *dtl) +{ + return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx); +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +static int dtl_enable(struct dtl *dtl) +{ + long int n_entries; + long int rc; + struct dtl_entry *buf = NULL; + + if (!dtl_cache) + return -ENOMEM; + + /* only allow one reader */ + if (dtl->buf) + return -EBUSY; + + /* ensure there are no other conflicting dtl users */ + if (!read_trylock(&dtl_access_lock)) + return -EBUSY; + + n_entries = dtl_buf_entries; + buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); + if (!buf) { + printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", + __func__, dtl->cpu); + read_unlock(&dtl_access_lock); + return -ENOMEM; + } + + spin_lock(&dtl->lock); + rc = -EBUSY; + if (!dtl->buf) { + /* store the original allocation size for use during read */ + dtl->buf_entries = n_entries; + dtl->buf = buf; + dtl->last_idx = 0; + rc = dtl_start(dtl); + if (rc) + dtl->buf = NULL; + } + spin_unlock(&dtl->lock); + + if (rc) { + read_unlock(&dtl_access_lock); + kmem_cache_free(dtl_cache, buf); + } + + return rc; +} + +static void dtl_disable(struct dtl *dtl) +{ + spin_lock(&dtl->lock); + dtl_stop(dtl); + kmem_cache_free(dtl_cache, dtl->buf); + dtl->buf = NULL; + dtl->buf_entries = 0; + spin_unlock(&dtl->lock); + read_unlock(&dtl_access_lock); +} + +/* file interface */ + +static int dtl_file_open(struct inode *inode, struct file *filp) +{ + struct dtl *dtl = inode->i_private; + int rc; + + rc = dtl_enable(dtl); + if (rc) + return rc; + + filp->private_data = dtl; + return 0; +} + +static int dtl_file_release(struct inode *inode, struct file *filp) +{ + struct dtl *dtl = inode->i_private; + dtl_disable(dtl); + return 0; +} + +static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + long int rc, n_read, n_req, read_size; + struct dtl *dtl; + u64 cur_idx, last_idx, i; + + if ((len % sizeof(struct dtl_entry)) != 0) + return -EINVAL; + + dtl = filp->private_data; + + /* requested number of entries to read */ + n_req = len / sizeof(struct dtl_entry); + + /* actual number of entries read */ + n_read = 0; + + spin_lock(&dtl->lock); + + cur_idx = dtl_current_index(dtl); + last_idx = dtl->last_idx; + + if (last_idx + dtl->buf_entries <= cur_idx) + last_idx = cur_idx - dtl->buf_entries + 1; + + if (last_idx + n_req > cur_idx) + n_req = cur_idx - last_idx; + + if (n_req > 0) + dtl->last_idx = last_idx + n_req; + + spin_unlock(&dtl->lock); + + if (n_req <= 0) + return 0; + + i = last_idx % dtl->buf_entries; + + /* read the tail of the buffer if we've wrapped */ + if (i + n_req > dtl->buf_entries) { + read_size = dtl->buf_entries - i; + + rc = copy_to_user(buf, &dtl->buf[i], + read_size * sizeof(struct dtl_entry)); + if (rc) + return -EFAULT; + + i = 0; + n_req -= read_size; + n_read += read_size; + buf += read_size * sizeof(struct dtl_entry); + } + + /* .. and now the head */ + rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry)); + if (rc) + return -EFAULT; + + n_read += n_req; + + return n_read * sizeof(struct dtl_entry); +} + +static const struct file_operations dtl_fops = { + .open = dtl_file_open, + .release = dtl_file_release, + .read = dtl_file_read, + .llseek = no_llseek, +}; + +static struct dentry *dtl_dir; + +static void dtl_setup_file(struct dtl *dtl) +{ + char name[10]; + + sprintf(name, "cpu-%d", dtl->cpu); + + debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops); +} + +static int dtl_init(void) +{ + int i; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return -ENODEV; + + /* set up common debugfs structure */ + + dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir); + + debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask); + debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries); + + /* set up the per-cpu log structures */ + for_each_possible_cpu(i) { + struct dtl *dtl = &per_cpu(cpu_dtl, i); + spin_lock_init(&dtl->lock); + dtl->cpu = i; + + dtl_setup_file(dtl); + } + + return 0; +} +machine_arch_initcall(pseries, dtl_init); +#endif /* CONFIG_DTL */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +/* + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. + */ +static notrace u64 scan_dispatch_log(u64 stop_tb) +{ + u64 i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (!dtl) + return 0; + + if (i == be64_to_cpu(vpa->dtl_idx)) + return 0; + while (i < be64_to_cpu(vpa->dtl_idx)) { + dtb = be64_to_cpu(dtl->timebase); + tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + + be32_to_cpu(dtl->ready_to_enqueue_time); + barrier(); + if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { + /* buffer has overflowed */ + i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; +#ifdef CONFIG_DTL + if (dtl_consumer) + dtl_consumer(dtl, i); +#endif + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; +} + +/* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void notrace pseries_accumulate_stolen_time(void) +{ + u64 sst, ust; + struct cpu_accounting_data *acct = &local_paca->accounting; + + sst = scan_dispatch_log(acct->starttime_user); + ust = scan_dispatch_log(acct->starttime); + acct->stime -= sst; + acct->utime -= ust; + acct->steal_time += ust + sst; +} + +u64 pseries_calculate_stolen_time(u64 stop_tb) +{ + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) + return scan_dispatch_log(stop_tb); + + return 0; +} + +#endif diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c new file mode 100644 index 0000000000..def184da51 --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -0,0 +1,887 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * The file intends to implement the platform dependent EEH operations on pseries. + * Actually, the pseries platform is built based on RTAS heavily. That means the + * pseries platform dependent EEH operations will be built on RTAS calls. The functions + * are derived from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has + * been done. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011. + * Copyright IBM Corporation 2001, 2005, 2006 + * Copyright Dave Engebretsen & Todd Inglett 2001 + * Copyright Linas Vepstas 2005, 2006 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* RTAS tokens */ +static int ibm_set_eeh_option; +static int ibm_set_slot_reset; +static int ibm_read_slot_reset_state; +static int ibm_read_slot_reset_state2; +static int ibm_slot_error_detail; +static int ibm_get_config_addr_info; +static int ibm_get_config_addr_info2; +static int ibm_configure_pe; + +static void pseries_eeh_init_edev(struct pci_dn *pdn); + +static void pseries_pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (eeh_has_flag(EEH_FORCE_DISABLED)) + return; + + dev_dbg(&pdev->dev, "EEH: Setting up device\n"); +#ifdef CONFIG_PCI_IOV + if (pdev->is_virtfn) { + pdn->device_id = pdev->device; + pdn->vendor_id = pdev->vendor; + pdn->class_code = pdev->class; + /* + * Last allow unfreeze return code used for retrieval + * by user space in eeh-sysfs to show the last command + * completion from platform. + */ + pdn->last_allow_rc = 0; + } +#endif + pseries_eeh_init_edev(pdn); +#ifdef CONFIG_PCI_IOV + if (pdev->is_virtfn) { + /* + * FIXME: This really should be handled by choosing the right + * parent PE in pseries_eeh_init_edev(). + */ + struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe; + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + + edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8); + eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */ + eeh_pe_tree_insert(edev, physfn_pe); /* Add as VF PE type */ + } +#endif + eeh_probe_device(pdev); +} + + +/** + * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device + * @pdn: pci_dn of the input device + * + * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo, + * pe_config_addr) as a handle to a given PE. This function finds the + * pe_config_addr based on the device's config addr. + * + * Keep in mind that the pe_config_addr *might* be numerically identical to the + * device's config addr, but the two are conceptually distinct. + * + * Returns the pe_config_addr, or a negative error code. + */ +static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn) +{ + int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + struct pci_controller *phb = pdn->phb; + int ret, rets[3]; + + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { + /* + * First of all, use function 1 to determine if this device is + * part of a PE or not. ret[0] being zero indicates it's not. + */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 1); + if (ret || (rets[0] == 0)) + return -ENOENT; + + /* Retrieve the associated PE config address with function 0 */ + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return -ENXIO; + } + + return rets[0]; + } + + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), 0); + if (ret) { + pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n", + __func__, phb->global_number, config_addr); + return -ENXIO; + } + + return rets[0]; + } + + /* + * PAPR does describe a process for finding the pe_config_addr that was + * used before the ibm,get-config-addr-info calls were added. However, + * I haven't found *any* systems that don't have that RTAS call + * implemented. If you happen to find one that needs the old DT based + * process, patches are welcome! + */ + return -ENOENT; +} + +/** + * pseries_eeh_phb_reset - Reset the specified PHB + * @phb: PCI controller + * @config_addr: the associated config address + * @option: reset option + * + * Reset the specified PHB/PE + */ +static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option) +{ + int ret; + + /* Reset PE through RTAS call */ + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + + /* If fundamental-reset not supported, try hot-reset */ + if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { + option = EEH_RESET_HOT; + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid), option); + } + + /* We need reset hold or settlement delay */ + if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT) + msleep(EEH_PE_RST_HOLD_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + + return ret; +} + +/** + * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE + * @phb: PCI controller + * @config_addr: the associated config address + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr) +{ + int ret; + /* Waiting 0.2s maximum before skipping configuration */ + int max_wait = 200; + + while (max_wait > 0) { + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, + config_addr, BUID_HI(phb->buid), + BUID_LO(phb->buid)); + + if (!ret) + return ret; + if (ret < 0) + break; + + /* + * If RTAS returns a delay value that's above 100ms, cut it + * down to 100ms in case firmware made a mistake. For more + * on how these delay values work see rtas_busy_delay_time + */ + if (ret > RTAS_EXTENDED_DELAY_MIN+2 && + ret <= RTAS_EXTENDED_DELAY_MAX) + ret = RTAS_EXTENDED_DELAY_MIN+2; + + max_wait -= rtas_busy_delay_time(ret); + + if (max_wait < 0) + break; + + rtas_busy_delay(ret); + } + + pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n", + __func__, phb->global_number, config_addr, ret); + /* PAPR defines -3 as "Parameter Error" for this function: */ + if (ret == -3) + return -EINVAL; + else + return -EIO; +} + +/* + * Buffer for reporting slot-error-detail rtas calls. Its here + * in BSS, and not dynamically alloced, so that it ends up in + * RMO where RTAS can access it. + */ +static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; +static DEFINE_SPINLOCK(slot_errbuf_lock); +static int eeh_error_buf_size; + +static int pseries_eeh_cap_start(struct pci_dn *pdn) +{ + u32 status; + + if (!pdn) + return 0; + + rtas_read_config(pdn, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + return PCI_CAPABILITY_LIST; +} + + +static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap) +{ + int pos = pseries_eeh_cap_start(pdn); + int cnt = 48; /* Maximal number of capabilities */ + u32 id; + + if (!pos) + return 0; + + while (cnt--) { + rtas_read_config(pdn, pos, 1, &pos); + if (pos < 0x40) + break; + pos &= ~3; + rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos += PCI_CAP_LIST_NEXT; + } + + return 0; +} + +static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 header; + int pos = 256; + int ttl = (4096 - 256) / 8; + + if (!edev || !edev->pcie_cap) + return 0; + if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + return 0; + else if (!header) + return 0; + + while (ttl-- > 0) { + if (PCI_EXT_CAP_ID(header) == cap && pos) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < 256) + break; + + if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + break; + } + + return 0; +} + +/** + * pseries_eeh_pe_get_parent - Retrieve the parent PE + * @edev: EEH device + * + * The whole PEs existing in the system are organized as hierarchy + * tree. The function is used to retrieve the parent PE according + * to the parent EEH device. + */ +static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev) +{ + struct eeh_dev *parent; + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + /* + * It might have the case for the indirect parent + * EEH device already having associated PE, but + * the direct parent EEH device doesn't have yet. + */ + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; + while (pdn) { + /* We're poking out of PCI territory */ + parent = pdn_to_eeh_dev(pdn); + if (!parent) + return NULL; + + if (parent->pe) + return parent->pe; + + pdn = pdn->parent; + } + + return NULL; +} + +/** + * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn + * + * @pdn: PCI device node + * + * When we discover a new PCI device via the device-tree we create a + * corresponding pci_dn and we allocate, but don't initialise, an eeh_dev. + * This function takes care of the initialisation and inserts the eeh_dev + * into the correct eeh_pe. If no eeh_pe exists we'll allocate one. + */ +static void pseries_eeh_init_edev(struct pci_dn *pdn) +{ + struct eeh_pe pe, *parent; + struct eeh_dev *edev; + u32 pcie_flags; + int ret; + + if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))) + return; + + /* + * Find the eeh_dev for this pdn. The storage for the eeh_dev was + * allocated at the same time as the pci_dn. + * + * XXX: We should probably re-visit that. + */ + edev = pdn_to_eeh_dev(pdn); + if (!edev) + return; + + /* + * If ->pe is set then we've already probed this device. We hit + * this path when a pci_dev is removed and rescanned while recovering + * a PE (i.e. for devices where the driver doesn't support error + * recovery). + */ + if (edev->pe) + return; + + /* Check class/vendor/device IDs */ + if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code) + return; + + /* Skip for PCI-ISA bridge */ + if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) + return; + + eeh_edev_dbg(edev, "Probing device\n"); + + /* + * Update class code and mode of eeh device. We need + * correctly reflects that current device is root port + * or PCIe switch downstream port. + */ + edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); + edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); + edev->mode &= 0xFFFFFF00; + if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + edev->mode |= EEH_DEV_BRIDGE; + if (edev->pcie_cap) { + rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); + pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; + if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + } + + /* first up, find the pe_config_addr for the PE containing the device */ + ret = pseries_eeh_get_pe_config_addr(pdn); + if (ret < 0) { + eeh_edev_dbg(edev, "Unable to find pe_config_addr\n"); + goto err; + } + + /* Try enable EEH on the fake PE */ + memset(&pe, 0, sizeof(struct eeh_pe)); + pe.phb = pdn->phb; + pe.addr = ret; + + eeh_edev_dbg(edev, "Enabling EEH on device\n"); + ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); + if (ret) { + eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret); + goto err; + } + + edev->pe_config_addr = pe.addr; + + eeh_add_flag(EEH_ENABLED); + + parent = pseries_eeh_pe_get_parent(edev); + eeh_pe_tree_insert(edev, parent); + eeh_save_bars(edev); + eeh_edev_dbg(edev, "EEH enabled for device"); + + return; + +err: + eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret); +} + +static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + + pdn = pci_get_pdn_by_devfn(pdev->bus, pdev->devfn); + if (!pdn) + return NULL; + + /* + * If the system supports EEH on this device then the eeh_dev was + * configured and inserted into a PE in pseries_eeh_init_edev() + */ + edev = pdn_to_eeh_dev(pdn); + if (!edev || !edev->pe) + return NULL; + + return edev; +} + +/** + * pseries_eeh_init_edev_recursive - Enable EEH for the indicated device + * @pdn: PCI device node + * + * This routine must be used to perform EEH initialization for the + * indicated PCI device that was added after system boot (e.g. + * hotplug, dlpar). + */ +void pseries_eeh_init_edev_recursive(struct pci_dn *pdn) +{ + struct pci_dn *n; + + if (!pdn) + return; + + list_for_each_entry(n, &pdn->child_list, list) + pseries_eeh_init_edev_recursive(n); + + pseries_eeh_init_edev(pdn); +} +EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive); + +/** + * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable + * @pe: EEH PE + * @option: operation to be issued + * + * The function is used to control the EEH functionality globally. + * Currently, following options are support according to PAPR: + * Enable EEH, Disable EEH, Enable MMIO and Enable DMA + */ +static int pseries_eeh_set_option(struct eeh_pe *pe, int option) +{ + int ret = 0; + + /* + * When we're enabling or disabling EEH functionality on + * the particular PE, the PE config address is possibly + * unavailable. Therefore, we have to figure it out from + * the FDT node. + */ + switch (option) { + case EEH_OPT_DISABLE: + case EEH_OPT_ENABLE: + case EEH_OPT_THAW_MMIO: + case EEH_OPT_THAW_DMA: + break; + case EEH_OPT_FREEZE_PE: + /* Not support */ + return 0; + default: + pr_err("%s: Invalid option %d\n", __func__, option); + return -EINVAL; + } + + ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, + pe->addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), option); + + return ret; +} + +/** + * pseries_eeh_get_state - Retrieve PE state + * @pe: EEH PE + * @delay: suggested time to wait if state is unavailable + * + * Retrieve the state of the specified PE. On RTAS compliant + * pseries platform, there already has one dedicated RTAS function + * for the purpose. It's notable that the associated PE config address + * might be ready when calling the function. Therefore, endeavour to + * use the PE config address if possible. Further more, there're 2 + * RTAS calls for the purpose, we need to try the new one and back + * to the old one if the new one couldn't work properly. + */ +static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay) +{ + int ret; + int rets[4]; + int result; + + if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { + ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, + pe->addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); + } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { + /* Fake PE unavailable info */ + rets[2] = 0; + ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, + pe->addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); + } else { + return EEH_STATE_NOT_SUPPORT; + } + + if (ret) + return ret; + + /* Parse the result out */ + if (!rets[1]) + return EEH_STATE_NOT_SUPPORT; + + switch(rets[0]) { + case 0: + result = EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE; + break; + case 1: + result = EEH_STATE_RESET_ACTIVE | + EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE; + break; + case 2: + result = 0; + break; + case 4: + result = EEH_STATE_MMIO_ENABLED; + break; + case 5: + if (rets[2]) { + if (delay) + *delay = rets[2]; + result = EEH_STATE_UNAVAILABLE; + } else { + result = EEH_STATE_NOT_SUPPORT; + } + break; + default: + result = EEH_STATE_NOT_SUPPORT; + } + + return result; +} + +/** + * pseries_eeh_reset - Reset the specified PE + * @pe: EEH PE + * @option: reset option + * + * Reset the specified PE + */ +static int pseries_eeh_reset(struct eeh_pe *pe, int option) +{ + return pseries_eeh_phb_reset(pe->phb, pe->addr, option); +} + +/** + * pseries_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: temporary or permanent error log + * @drv_log: driver log to be combined with retrieved error log + * @len: length of driver log + * + * Retrieve the temporary or permanent error from the PE. + * Actually, the error will be retrieved through the dedicated + * RTAS call. + */ +static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&slot_errbuf_lock, flags); + memset(slot_errbuf, 0, eeh_error_buf_size); + + ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr, + BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), + virt_to_phys(drv_log), len, + virt_to_phys(slot_errbuf), eeh_error_buf_size, + severity); + if (!ret) + log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); + spin_unlock_irqrestore(&slot_errbuf_lock, flags); + + return ret; +} + +/** + * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE + * @pe: EEH PE + * + */ +static int pseries_eeh_configure_bridge(struct eeh_pe *pe) +{ + return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr); +} + +/** + * pseries_eeh_read_config - Read PCI config space + * @edev: EEH device handle + * @where: PCI config space offset + * @size: size to read + * @val: return value + * + * Read config space from the speicifed device + */ +static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val) +{ + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + return rtas_read_config(pdn, where, size, val); +} + +/** + * pseries_eeh_write_config - Write PCI config space + * @edev: EEH device handle + * @where: PCI config space offset + * @size: size to write + * @val: value to be written + * + * Write config space to the specified device + */ +static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val) +{ + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + return rtas_write_config(pdn, where, size, val); +} + +#ifdef CONFIG_PCI_IOV +static int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs) +{ + int rc; + int ibm_allow_unfreeze = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE); + unsigned long buid, addr; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL, + addr, + BUID_HI(buid), + BUID_LO(buid), + rtas_data_buf, cur_vfs * sizeof(u16)); + spin_unlock(&rtas_data_buf_lock); + if (rc) + pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n", + __func__, + pdn->phb->global_number, addr, rc); + return rc; +} + +static int pseries_call_allow_unfreeze(struct eeh_dev *edev) +{ + int cur_vfs = 0, rc = 0, vf_index, bus, devfn, vf_pe_num; + struct pci_dn *pdn, *tmp, *parent, *physfn_pdn; + u16 *vf_pe_array; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) { + if (edev->pdev->is_physfn) { + cur_vfs = pci_num_vf(edev->pdev); + pdn = eeh_dev_to_pdn(edev); + parent = pdn->parent; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) + vf_pe_array[vf_index] = + cpu_to_be16(pdn->pe_num_map[vf_index]); + rc = pseries_send_allow_unfreeze(pdn, vf_pe_array, + cur_vfs); + pdn->last_allow_rc = rc; + for (vf_index = 0; vf_index < cur_vfs; vf_index++) { + list_for_each_entry_safe(pdn, tmp, + &parent->child_list, + list) { + bus = pci_iov_virtfn_bus(edev->pdev, + vf_index); + devfn = pci_iov_virtfn_devfn(edev->pdev, + vf_index); + if (pdn->busno != bus || + pdn->devfn != devfn) + continue; + pdn->last_allow_rc = rc; + } + } + } else { + pdn = pci_get_pdn(edev->pdev); + physfn_pdn = pci_get_pdn(edev->physfn); + + vf_pe_num = physfn_pdn->pe_num_map[edev->vf_index]; + vf_pe_array[0] = cpu_to_be16(vf_pe_num); + rc = pseries_send_allow_unfreeze(physfn_pdn, + vf_pe_array, 1); + pdn->last_allow_rc = rc; + } + } + + kfree(vf_pe_array); + return rc; +} + +static int pseries_notify_resume(struct eeh_dev *edev) +{ + if (!edev) + return -EEXIST; + + if (rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE) == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + if (edev->pdev->is_physfn || edev->pdev->is_virtfn) + return pseries_call_allow_unfreeze(edev); + + return 0; +} +#endif + +static struct eeh_ops pseries_eeh_ops = { + .name = "pseries", + .probe = pseries_eeh_probe, + .set_option = pseries_eeh_set_option, + .get_state = pseries_eeh_get_state, + .reset = pseries_eeh_reset, + .get_log = pseries_eeh_get_log, + .configure_bridge = pseries_eeh_configure_bridge, + .err_inject = NULL, + .read_config = pseries_eeh_read_config, + .write_config = pseries_eeh_write_config, + .next_error = NULL, + .restore_config = NULL, /* NB: configure_bridge() does this */ +#ifdef CONFIG_PCI_IOV + .notify_resume = pseries_notify_resume +#endif +}; + +/** + * eeh_pseries_init - Register platform dependent EEH operations + * + * EEH initialization on pseries platform. This function should be + * called before any EEH related functions. + */ +static int __init eeh_pseries_init(void) +{ + struct pci_controller *phb; + struct pci_dn *pdn; + int ret, config_addr; + + /* figure out EEH RTAS function call tokens */ + ibm_set_eeh_option = rtas_function_token(RTAS_FN_IBM_SET_EEH_OPTION); + ibm_set_slot_reset = rtas_function_token(RTAS_FN_IBM_SET_SLOT_RESET); + ibm_read_slot_reset_state2 = rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE2); + ibm_read_slot_reset_state = rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE); + ibm_slot_error_detail = rtas_function_token(RTAS_FN_IBM_SLOT_ERROR_DETAIL); + ibm_get_config_addr_info2 = rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO2); + ibm_get_config_addr_info = rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO); + ibm_configure_pe = rtas_function_token(RTAS_FN_IBM_CONFIGURE_PE); + + /* + * ibm,configure-pe and ibm,configure-bridge have the same semantics, + * however ibm,configure-pe can be faster. If we can't find + * ibm,configure-pe then fall back to using ibm,configure-bridge. + */ + if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE) + ibm_configure_pe = rtas_function_token(RTAS_FN_IBM_CONFIGURE_BRIDGE); + + /* + * Necessary sanity check. We needn't check "get-config-addr-info" + * and its variant since the old firmware probably support address + * of domain/bus/slot/function for EEH RTAS operations. + */ + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE || + ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE || + (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) || + ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE || + ibm_configure_pe == RTAS_UNKNOWN_SERVICE) { + pr_info("EEH functionality not supported\n"); + return -EINVAL; + } + + /* Initialize error log size */ + eeh_error_buf_size = rtas_get_error_log_max(); + + /* Set EEH probe mode */ + eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG); + + /* Set EEH machine dependent code */ + ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device; + + if (is_kdump_kernel() || reset_devices) { + pr_info("Issue PHB reset ...\n"); + list_for_each_entry(phb, &hose_list, list_node) { + // Skip if the slot is empty + if (list_empty(&PCI_DN(phb->dn)->child_list)) + continue; + + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); + config_addr = pseries_eeh_get_pe_config_addr(pdn); + + /* invalid PE config addr */ + if (config_addr < 0) + continue; + + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL); + pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE); + pseries_eeh_phb_configure_bridge(phb, config_addr); + } + } + + ret = eeh_init(&pseries_eeh_ops); + if (!ret) + pr_info("EEH: pSeries platform initialized\n"); + else + pr_info("EEH: pSeries platform initialization failure (%d)\n", + ret); + return ret; +} +machine_arch_initcall(pseries, eeh_pseries_init); diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c new file mode 100644 index 0000000000..623dfe0d8e --- /dev/null +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2001 Dave Engebretsen IBM Corporation + */ + +#include +#include + +#include "pseries.h" + +void __init request_event_sources_irqs(struct device_node *np, + irq_handler_t handler, + const char *name) +{ + int i, virq, rc; + + for (i = 0; i < 16; i++) { + virq = of_irq_get(np, i); + if (virq < 0) + return; + if (WARN(!virq, "event-sources: Unable to allocate " + "interrupt number for %pOF\n", np)) + continue; + + rc = request_irq(virq, handler, 0, name, NULL); + if (WARN(rc, "event-sources: Unable to request interrupt %d for %pOF\n", + virq, np)) + return; + } +} diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c new file mode 100644 index 0000000000..18447e5fa1 --- /dev/null +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * pSeries firmware setup code. + * + * Portions from arch/powerpc/platforms/pseries/setup.c: + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * + * Portions from arch/powerpc/kernel/firmware.c + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * Copyright (C) 2005 Stephen Rothwell, IBM Corporation + * + * Copyright 2006 IBM Corporation. + */ + + +#include +#include +#include +#include +#include + +#include "pseries.h" + +struct hypertas_fw_feature { + unsigned long val; + char * name; +}; + +/* + * The names in this table match names in rtas/ibm,hypertas-functions. If the + * entry ends in a '*', only upto the '*' is matched. Otherwise the entire + * string must match. + */ +static __initdata struct hypertas_fw_feature +hypertas_fw_features_table[] = { + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK_REMOVE, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE, + "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, + {FW_FEATURE_VPHN, "hcall-vphn"}, + {FW_FEATURE_SET_MODE, "hcall-set-mode"}, + {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"}, + {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"}, + {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"}, + {FW_FEATURE_PAPR_SCM, "hcall-scm"}, + {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"}, + {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"}, + {FW_FEATURE_WATCHDOG, "hcall-watchdog"}, + {FW_FEATURE_PLPKS, "hcall-pks"}, +}; + +/* Build up the firmware features bitmask using the contents of + * device-tree/ibm,hypertas-functions. Ultimately this functionality may + * be moved into prom.c prom_init(). + */ +static void __init fw_hypertas_feature_init(const char *hypertas, + unsigned long len) +{ + const char *s; + int i; + + pr_debug(" -> fw_hypertas_feature_init()\n"); + + for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) { + for (i = 0; i < ARRAY_SIZE(hypertas_fw_features_table); i++) { + const char *name = hypertas_fw_features_table[i].name; + size_t size; + + /* + * If there is a '*' at the end of name, only check + * upto there + */ + size = strlen(name); + if (size && name[size - 1] == '*') { + if (strncmp(name, s, size - 1)) + continue; + } else if (strcmp(name, s)) + continue; + + /* we have a match */ + powerpc_firmware_features |= + hypertas_fw_features_table[i].val; + break; + } + } + + if (is_secure_guest() && + (powerpc_firmware_features & FW_FEATURE_PUT_TCE_IND)) { + powerpc_firmware_features &= ~FW_FEATURE_PUT_TCE_IND; + pr_debug("SVM: disabling PUT_TCE_IND firmware feature\n"); + } + + pr_debug(" <- fw_hypertas_feature_init()\n"); +} + +struct vec5_fw_feature { + unsigned long val; + unsigned int feature; +}; + +static __initdata struct vec5_fw_feature +vec5_fw_features_table[] = { + {FW_FEATURE_FORM1_AFFINITY, OV5_FORM1_AFFINITY}, + {FW_FEATURE_PRRN, OV5_PRRN}, + {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2}, + {FW_FEATURE_DRC_INFO, OV5_DRC_INFO}, + {FW_FEATURE_FORM2_AFFINITY, OV5_FORM2_AFFINITY}, +}; + +static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) +{ + unsigned int index, feat; + int i; + + pr_debug(" -> fw_vec5_feature_init()\n"); + + for (i = 0; i < ARRAY_SIZE(vec5_fw_features_table); i++) { + index = OV5_INDX(vec5_fw_features_table[i].feature); + feat = OV5_FEAT(vec5_fw_features_table[i].feature); + + if (index < len && (vec5[index] & feat)) + powerpc_firmware_features |= + vec5_fw_features_table[i].val; + } + + pr_debug(" <- fw_vec5_feature_init()\n"); +} + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init probe_fw_features(unsigned long node, const char *uname, int + depth, void *data) +{ + const char *prop; + int len; + static int hypertas_found; + static int vec5_found; + + if (depth != 1) + return 0; + + if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) { + prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions", + &len); + if (prop) { + powerpc_firmware_features |= FW_FEATURE_LPAR; + fw_hypertas_feature_init(prop, len); + } + + hypertas_found = 1; + } + + if (!strcmp(uname, "chosen")) { + prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5", + &len); + if (prop) + fw_vec5_feature_init(prop, len); + + vec5_found = 1; + } + + return hypertas_found && vec5_found; +} + +void __init pseries_probe_fw_features(void) +{ + of_scan_flat_dt(probe_fw_features, NULL); +} diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c new file mode 100644 index 0000000000..e62835a12d --- /dev/null +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -0,0 +1,901 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * pseries CPU Hotplug infrastructure. + * + * Split out from arch/powerpc/platforms/pseries/setup.c + * arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c + * + * Peter Bergner, IBM March 2001. + * Copyright (C) 2001 IBM. + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * Plus various changes from other IBM teams... + * + * Copyright (C) 2006 Michael Ellerman, IBM Corporation + */ + +#define pr_fmt(fmt) "pseries-hotplug-cpu: " fmt + +#include +#include +#include +#include /* for idle_task_exit */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +/* This version can't take the spinlock, because it never returns */ +static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; + +/* + * Record the CPU ids used on each nodes. + * Protected by cpu_add_remove_lock. + */ +static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES]; + +static void rtas_stop_self(void) +{ + static struct rtas_args args; + + local_irq_disable(); + + BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); + + rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); + + panic("Alas, I survived.\n"); +} + +static void pseries_cpu_offline_self(void) +{ + unsigned int hwcpu = hard_smp_processor_id(); + + local_irq_disable(); + idle_task_exit(); + if (xive_enabled()) + xive_teardown_cpu(); + else + xics_teardown_cpu(); + + unregister_slb_shadow(hwcpu); + unregister_vpa(hwcpu); + rtas_stop_self(); + + /* Should never get here... */ + BUG(); + for(;;); +} + +static int pseries_cpu_disable(void) +{ + int cpu = smp_processor_id(); + + set_cpu_online(cpu, false); + vdso_data->processorCount--; + + /*fix boot_cpuid here*/ + if (cpu == boot_cpuid) + boot_cpuid = cpumask_any(cpu_online_mask); + + /* FIXME: abstract this to not be platform specific later on */ + if (xive_enabled()) + xive_smp_disable_cpu(); + else + xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + + return 0; +} + +/* + * pseries_cpu_die: Wait for the cpu to die. + * @cpu: logical processor id of the CPU whose death we're awaiting. + * + * This function is called from the context of the thread which is performing + * the cpu-offline. Here we wait for long enough to allow the cpu in question + * to self-destroy so that the cpu-offline thread can send the CPU_DEAD + * notifications. + * + * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to + * self-destruct. + */ +static void pseries_cpu_die(unsigned int cpu) +{ + int cpu_status = 1; + unsigned int pcpu = get_hard_smp_processor_id(cpu); + unsigned long timeout = jiffies + msecs_to_jiffies(120000); + + while (true) { + cpu_status = smp_query_cpu_stopped(pcpu); + if (cpu_status == QCSS_STOPPED || + cpu_status == QCSS_HARDWARE_ERROR) + break; + + if (time_after(jiffies, timeout)) { + pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n", + cpu, pcpu); + timeout = jiffies + msecs_to_jiffies(120000); + } + + cond_resched(); + } + + if (cpu_status == QCSS_HARDWARE_ERROR) { + pr_warn("CPU %i (hwid %i) reported error while dying\n", + cpu, pcpu); + } + + paca_ptrs[cpu]->cpu_start = 0; +} + +/** + * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids. + * @nthreads : the number of threads (cpu ids) + * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any + * node can be peek. + * @cpu_mask: the returned CPU mask. + * + * Returns 0 on success. + */ +static int find_cpu_id_range(unsigned int nthreads, int assigned_node, + cpumask_var_t *cpu_mask) +{ + cpumask_var_t candidate_mask; + unsigned int cpu, node; + int rc = -ENOSPC; + + if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(*cpu_mask); + for (cpu = 0; cpu < nthreads; cpu++) + cpumask_set_cpu(cpu, *cpu_mask); + + BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask)); + + /* Get a bitmap of unoccupied slots. */ + cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask); + + if (assigned_node != NUMA_NO_NODE) { + /* + * Remove free ids previously assigned on the other nodes. We + * can walk only online nodes because once a node became online + * it is not turned offlined back. + */ + for_each_online_node(node) { + if (node == assigned_node) + continue; + cpumask_andnot(candidate_mask, candidate_mask, + node_recorded_ids_map[node]); + } + } + + if (cpumask_empty(candidate_mask)) + goto out; + + while (!cpumask_empty(*cpu_mask)) { + if (cpumask_subset(*cpu_mask, candidate_mask)) + /* Found a range where we can insert the new cpu(s) */ + break; + cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads); + } + + if (!cpumask_empty(*cpu_mask)) + rc = 0; + +out: + free_cpumask_var(candidate_mask); + return rc; +} + +/* + * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle + * here is that a cpu device node may represent multiple logical cpus + * in the SMT case. We must honor the assumption in other code that + * the logical ids for sibling SMT threads x and y are adjacent, such + * that x^1 == y and y^1 == x. + */ +static int pseries_add_processor(struct device_node *np) +{ + int len, nthreads, node, cpu, assigned_node; + int rc = 0; + cpumask_var_t cpu_mask; + const __be32 *intserv; + + intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return 0; + + nthreads = len / sizeof(u32); + + if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + + /* + * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA + * node id the added CPU belongs to. + */ + node = of_node_to_nid(np); + if (node < 0 || !node_possible(node)) + node = first_online_node; + + BUG_ON(node == NUMA_NO_NODE); + assigned_node = node; + + cpu_maps_update_begin(); + + rc = find_cpu_id_range(nthreads, node, &cpu_mask); + if (rc && nr_node_ids > 1) { + /* + * Try again, considering the free CPU ids from the other node. + */ + node = NUMA_NO_NODE; + rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask); + } + + if (rc) { + pr_err("Cannot add cpu %pOF; this system configuration" + " supports %d logical cpus.\n", np, num_possible_cpus()); + goto out; + } + + for_each_cpu(cpu, cpu_mask) { + BUG_ON(cpu_present(cpu)); + set_cpu_present(cpu, true); + set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++)); + } + + /* Record the newly used CPU ids for the associate node. */ + cpumask_or(node_recorded_ids_map[assigned_node], + node_recorded_ids_map[assigned_node], cpu_mask); + + /* + * If node is set to NUMA_NO_NODE, CPU ids have be reused from + * another node, remove them from its mask. + */ + if (node == NUMA_NO_NODE) { + cpu = cpumask_first(cpu_mask); + pr_warn("Reusing free CPU ids %d-%d from another node\n", + cpu, cpu + nthreads - 1); + for_each_online_node(node) { + if (node == assigned_node) + continue; + cpumask_andnot(node_recorded_ids_map[node], + node_recorded_ids_map[node], + cpu_mask); + } + } + +out: + cpu_maps_update_done(); + free_cpumask_var(cpu_mask); + return rc; +} + +/* + * Update the present map for a cpu node which is going away, and set + * the hard id in the paca(s) to -1 to be consistent with boot time + * convention for non-present cpus. + */ +static void pseries_remove_processor(struct device_node *np) +{ + unsigned int cpu; + int len, nthreads, i; + const __be32 *intserv; + u32 thread; + + intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return; + + nthreads = len / sizeof(u32); + + cpu_maps_update_begin(); + for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != thread) + continue; + BUG_ON(cpu_online(cpu)); + set_cpu_present(cpu, false); + set_hard_smp_processor_id(cpu, -1); + update_numa_cpu_lookup_table(cpu, -1); + break; + } + if (cpu >= nr_cpu_ids) + printk(KERN_WARNING "Could not find cpu to remove " + "with physical id 0x%x\n", thread); + } + cpu_maps_update_done(); +} + +static int dlpar_offline_cpu(struct device_node *dn) +{ + int rc = 0; + unsigned int cpu; + int len, nthreads, i; + const __be32 *intserv; + u32 thread; + + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return -EINVAL; + + nthreads = len / sizeof(u32); + + cpu_maps_update_begin(); + for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != thread) + continue; + + if (!cpu_online(cpu)) + break; + + /* + * device_offline() will return -EBUSY (via cpu_down()) if there + * is only one CPU left. Check it here to fail earlier and with a + * more informative error message, while also retaining the + * cpu_add_remove_lock to be sure that no CPUs are being + * online/offlined during this check. + */ + if (num_online_cpus() == 1) { + pr_warn("Unable to remove last online CPU %pOFn\n", dn); + rc = -EBUSY; + goto out_unlock; + } + + cpu_maps_update_done(); + rc = device_offline(get_cpu_device(cpu)); + if (rc) + goto out; + cpu_maps_update_begin(); + break; + } + if (cpu == num_possible_cpus()) { + pr_warn("Could not find cpu to offline with physical id 0x%x\n", + thread); + } + } +out_unlock: + cpu_maps_update_done(); + +out: + return rc; +} + +static int dlpar_online_cpu(struct device_node *dn) +{ + int rc = 0; + unsigned int cpu; + int len, nthreads, i; + const __be32 *intserv; + u32 thread; + + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); + if (!intserv) + return -EINVAL; + + nthreads = len / sizeof(u32); + + cpu_maps_update_begin(); + for (i = 0; i < nthreads; i++) { + thread = be32_to_cpu(intserv[i]); + for_each_present_cpu(cpu) { + if (get_hard_smp_processor_id(cpu) != thread) + continue; + + if (!topology_is_primary_thread(cpu)) { + if (cpu_smt_control != CPU_SMT_ENABLED) + break; + if (!topology_smt_thread_allowed(cpu)) + break; + } + + cpu_maps_update_done(); + find_and_update_cpu_nid(cpu); + rc = device_online(get_cpu_device(cpu)); + if (rc) { + dlpar_offline_cpu(dn); + goto out; + } + cpu_maps_update_begin(); + + break; + } + if (cpu == num_possible_cpus()) + printk(KERN_WARNING "Could not find cpu to online " + "with physical id 0x%x\n", thread); + } + cpu_maps_update_done(); + +out: + return rc; + +} + +static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index) +{ + struct device_node *child = NULL; + u32 my_drc_index; + bool found; + int rc; + + /* Assume cpu doesn't exist */ + found = false; + + for_each_child_of_node(parent, child) { + rc = of_property_read_u32(child, "ibm,my-drc-index", + &my_drc_index); + if (rc) + continue; + + if (my_drc_index == drc_index) { + of_node_put(child); + found = true; + break; + } + } + + return found; +} + +static bool drc_info_valid_index(struct device_node *parent, u32 drc_index) +{ + struct property *info; + struct of_drc_info drc; + const __be32 *value; + u32 index; + int count, i, j; + + info = of_find_property(parent, "ibm,drc-info", NULL); + if (!info) + return false; + + value = of_prop_next_u32(info, NULL, &count); + + /* First value of ibm,drc-info is number of drc-info records */ + if (value) + value++; + else + return false; + + for (i = 0; i < count; i++) { + if (of_read_drc_info_cell(&info, &value, &drc)) + return false; + + if (strncmp(drc.drc_type, "CPU", 3)) + break; + + if (drc_index > drc.last_drc_index) + continue; + + index = drc.drc_index_start; + for (j = 0; j < drc.num_sequential_elems; j++) { + if (drc_index == index) + return true; + + index += drc.sequential_inc; + } + } + + return false; +} + +static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index) +{ + bool found = false; + int rc, index; + + if (of_property_present(parent, "ibm,drc-info")) + return drc_info_valid_index(parent, drc_index); + + /* Note that the format of the ibm,drc-indexes array is + * the number of entries in the array followed by the array + * of drc values so we start looking at index = 1. + */ + index = 1; + while (!found) { + u32 drc; + + rc = of_property_read_u32_index(parent, "ibm,drc-indexes", + index++, &drc); + + if (rc) + break; + + if (drc == drc_index) + found = true; + } + + return found; +} + +static int pseries_cpuhp_attach_nodes(struct device_node *dn) +{ + struct of_changeset cs; + int ret; + + /* + * This device node is unattached but may have siblings; open-code the + * traversal. + */ + for (of_changeset_init(&cs); dn != NULL; dn = dn->sibling) { + ret = of_changeset_attach_node(&cs, dn); + if (ret) + goto out; + } + + ret = of_changeset_apply(&cs); +out: + of_changeset_destroy(&cs); + return ret; +} + +static ssize_t dlpar_cpu_add(u32 drc_index) +{ + struct device_node *dn, *parent; + int rc, saved_rc; + + pr_debug("Attempting to add CPU, drc index: %x\n", drc_index); + + parent = of_find_node_by_path("/cpus"); + if (!parent) { + pr_warn("Failed to find CPU root node \"/cpus\"\n"); + return -ENODEV; + } + + if (dlpar_cpu_exists(parent, drc_index)) { + of_node_put(parent); + pr_warn("CPU with drc index %x already exists\n", drc_index); + return -EINVAL; + } + + if (!valid_cpu_drc_index(parent, drc_index)) { + of_node_put(parent); + pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index); + return -EINVAL; + } + + rc = dlpar_acquire_drc(drc_index); + if (rc) { + pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n", + rc, drc_index); + of_node_put(parent); + return -EINVAL; + } + + dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); + if (!dn) { + pr_warn("Failed call to configure-connector, drc index: %x\n", + drc_index); + dlpar_release_drc(drc_index); + of_node_put(parent); + return -EINVAL; + } + + rc = pseries_cpuhp_attach_nodes(dn); + + /* Regardless we are done with parent now */ + of_node_put(parent); + + if (rc) { + saved_rc = rc; + pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n", + dn, rc, drc_index); + + rc = dlpar_release_drc(drc_index); + if (!rc) + dlpar_free_cc_nodes(dn); + + return saved_rc; + } + + update_numa_distance(dn); + + rc = dlpar_online_cpu(dn); + if (rc) { + saved_rc = rc; + pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n", + dn, rc, drc_index); + + rc = dlpar_detach_node(dn); + if (!rc) + dlpar_release_drc(drc_index); + + return saved_rc; + } + + pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn, + drc_index); + return rc; +} + +static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn) +{ + unsigned int use_count = 0; + struct device_node *dn, *tn; + + WARN_ON(!of_node_is_type(cachedn, "cache")); + + for_each_of_cpu_node(dn) { + tn = of_find_next_cache_node(dn); + of_node_put(tn); + if (tn == cachedn) + use_count++; + } + + for_each_node_by_type(dn, "cache") { + tn = of_find_next_cache_node(dn); + of_node_put(tn); + if (tn == cachedn) + use_count++; + } + + return use_count; +} + +static int pseries_cpuhp_detach_nodes(struct device_node *cpudn) +{ + struct device_node *dn; + struct of_changeset cs; + int ret = 0; + + of_changeset_init(&cs); + ret = of_changeset_detach_node(&cs, cpudn); + if (ret) + goto out; + + dn = cpudn; + while ((dn = of_find_next_cache_node(dn))) { + if (pseries_cpuhp_cache_use_count(dn) > 1) { + of_node_put(dn); + break; + } + + ret = of_changeset_detach_node(&cs, dn); + of_node_put(dn); + if (ret) + goto out; + } + + ret = of_changeset_apply(&cs); +out: + of_changeset_destroy(&cs); + return ret; +} + +static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) +{ + int rc; + + pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n", + dn, drc_index); + + rc = dlpar_offline_cpu(dn); + if (rc) { + pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc); + return -EINVAL; + } + + rc = dlpar_release_drc(drc_index); + if (rc) { + pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n", + drc_index, dn, rc); + dlpar_online_cpu(dn); + return rc; + } + + rc = pseries_cpuhp_detach_nodes(dn); + if (rc) { + int saved_rc = rc; + + pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc); + + rc = dlpar_acquire_drc(drc_index); + if (!rc) + dlpar_online_cpu(dn); + + return saved_rc; + } + + pr_debug("Successfully removed CPU, drc index: %x\n", drc_index); + return 0; +} + +static struct device_node *cpu_drc_index_to_dn(u32 drc_index) +{ + struct device_node *dn; + u32 my_index; + int rc; + + for_each_node_by_type(dn, "cpu") { + rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index); + if (rc) + continue; + + if (my_index == drc_index) + break; + } + + return dn; +} + +static int dlpar_cpu_remove_by_index(u32 drc_index) +{ + struct device_node *dn; + int rc; + + dn = cpu_drc_index_to_dn(drc_index); + if (!dn) { + pr_warn("Cannot find CPU (drc index %x) to remove\n", + drc_index); + return -ENODEV; + } + + rc = dlpar_cpu_remove(dn, drc_index); + of_node_put(dn); + return rc; +} + +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) +{ + u32 drc_index; + int rc; + + drc_index = hp_elog->_drc_u.drc_index; + + lock_device_hotplug(); + + switch (hp_elog->action) { + case PSERIES_HP_ELOG_ACTION_REMOVE: + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) { + rc = dlpar_cpu_remove_by_index(drc_index); + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the CPU removal failed. + */ + if (rc) + dlpar_unisolate_drc(drc_index); + } + else + rc = -EINVAL; + break; + case PSERIES_HP_ELOG_ACTION_ADD: + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + rc = dlpar_cpu_add(drc_index); + else + rc = -EINVAL; + break; + default: + pr_err("Invalid action (%d) specified\n", hp_elog->action); + rc = -EINVAL; + break; + } + + unlock_device_hotplug(); + return rc; +} + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE + +static ssize_t dlpar_cpu_probe(const char *buf, size_t count) +{ + u32 drc_index; + int rc; + + rc = kstrtou32(buf, 0, &drc_index); + if (rc) + return -EINVAL; + + rc = dlpar_cpu_add(drc_index); + + return rc ? rc : count; +} + +static ssize_t dlpar_cpu_release(const char *buf, size_t count) +{ + struct device_node *dn; + u32 drc_index; + int rc; + + dn = of_find_node_by_path(buf); + if (!dn) + return -EINVAL; + + rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index); + if (rc) { + of_node_put(dn); + return -EINVAL; + } + + rc = dlpar_cpu_remove(dn, drc_index); + of_node_put(dn); + + return rc ? rc : count; +} + +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ + +static int pseries_smp_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + int err = 0; + + switch (action) { + case OF_RECONFIG_ATTACH_NODE: + err = pseries_add_processor(rd->dn); + break; + case OF_RECONFIG_DETACH_NODE: + pseries_remove_processor(rd->dn); + break; + } + return notifier_from_errno(err); +} + +static struct notifier_block pseries_smp_nb = { + .notifier_call = pseries_smp_notifier, +}; + +void __init pseries_cpu_hotplug_init(void) +{ + int qcss_tok; + + rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF); + qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE); + + if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE || + qcss_tok == RTAS_UNKNOWN_SERVICE) { + printk(KERN_INFO "CPU Hotplug not supported by firmware " + "- disabling.\n"); + return; + } + + smp_ops->cpu_offline_self = pseries_cpu_offline_self; + smp_ops->cpu_disable = pseries_cpu_disable; + smp_ops->cpu_die = pseries_cpu_die; +} + +static int __init pseries_dlpar_init(void) +{ + unsigned int node; + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE + ppc_md.cpu_probe = dlpar_cpu_probe; + ppc_md.cpu_release = dlpar_cpu_release; +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ + + /* Processors can be added/removed only on LPAR */ + if (firmware_has_feature(FW_FEATURE_LPAR)) { + for_each_node(node) { + if (!alloc_cpumask_var_node(&node_recorded_ids_map[node], + GFP_KERNEL, node)) + return -ENOMEM; + + /* Record ids of CPU added at boot time */ + cpumask_copy(node_recorded_ids_map[node], + cpumask_of_node(node)); + } + + of_reconfig_notifier_register(&pseries_smp_nb); + } + + return 0; +} +machine_arch_initcall(pseries, pseries_dlpar_init); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c new file mode 100644 index 0000000000..4adca5b61d --- /dev/null +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -0,0 +1,923 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * pseries Memory Hotplug infrastructure. + * + * Copyright (C) 2008 Badari Pulavarty, IBM Corporation + */ + +#define pr_fmt(fmt) "pseries-hotplug-mem: " fmt + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "pseries.h" + +static void dlpar_free_property(struct property *prop) +{ + kfree(prop->name); + kfree(prop->value); + kfree(prop); +} + +static struct property *dlpar_clone_property(struct property *prop, + u32 prop_size) +{ + struct property *new_prop; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return NULL; + + new_prop->name = kstrdup(prop->name, GFP_KERNEL); + new_prop->value = kzalloc(prop_size, GFP_KERNEL); + if (!new_prop->name || !new_prop->value) { + dlpar_free_property(new_prop); + return NULL; + } + + memcpy(new_prop->value, prop->value, prop->length); + new_prop->length = prop_size; + + of_property_set_flag(new_prop, OF_DYNAMIC); + return new_prop; +} + +static bool find_aa_index(struct device_node *dr_node, + struct property *ala_prop, + const u32 *lmb_assoc, u32 *aa_index) +{ + u32 *assoc_arrays, new_prop_size; + struct property *new_prop; + int aa_arrays, aa_array_entries, aa_array_sz; + int i, index; + + /* + * The ibm,associativity-lookup-arrays property is defined to be + * a 32-bit value specifying the number of associativity arrays + * followed by a 32-bitvalue specifying the number of entries per + * array, followed by the associativity arrays. + */ + assoc_arrays = ala_prop->value; + + aa_arrays = be32_to_cpu(assoc_arrays[0]); + aa_array_entries = be32_to_cpu(assoc_arrays[1]); + aa_array_sz = aa_array_entries * sizeof(u32); + + for (i = 0; i < aa_arrays; i++) { + index = (i * aa_array_entries) + 2; + + if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz)) + continue; + + *aa_index = i; + return true; + } + + new_prop_size = ala_prop->length + aa_array_sz; + new_prop = dlpar_clone_property(ala_prop, new_prop_size); + if (!new_prop) + return false; + + assoc_arrays = new_prop->value; + + /* increment the number of entries in the lookup array */ + assoc_arrays[0] = cpu_to_be32(aa_arrays + 1); + + /* copy the new associativity into the lookup array */ + index = aa_arrays * aa_array_entries + 2; + memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz); + + of_update_property(dr_node, new_prop); + + /* + * The associativity lookup array index for this lmb is + * number of entries - 1 since we added its associativity + * to the end of the lookup array. + */ + *aa_index = be32_to_cpu(assoc_arrays[0]) - 1; + return true; +} + +static int update_lmb_associativity_index(struct drmem_lmb *lmb) +{ + struct device_node *parent, *lmb_node, *dr_node; + struct property *ala_prop; + const u32 *lmb_assoc; + u32 aa_index; + bool found; + + parent = of_find_node_by_path("/"); + if (!parent) + return -ENODEV; + + lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index), + parent); + of_node_put(parent); + if (!lmb_node) + return -EINVAL; + + lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL); + if (!lmb_assoc) { + dlpar_free_cc_nodes(lmb_node); + return -ENODEV; + } + + update_numa_distance(lmb_node); + + dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (!dr_node) { + dlpar_free_cc_nodes(lmb_node); + return -ENODEV; + } + + ala_prop = of_find_property(dr_node, "ibm,associativity-lookup-arrays", + NULL); + if (!ala_prop) { + of_node_put(dr_node); + dlpar_free_cc_nodes(lmb_node); + return -ENODEV; + } + + found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index); + + of_node_put(dr_node); + dlpar_free_cc_nodes(lmb_node); + + if (!found) { + pr_err("Could not find LMB associativity\n"); + return -1; + } + + lmb->aa_index = aa_index; + return 0; +} + +static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb) +{ + unsigned long section_nr; + struct memory_block *mem_block; + + section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); + + mem_block = find_memory_block(section_nr); + return mem_block; +} + +static int get_lmb_range(u32 drc_index, int n_lmbs, + struct drmem_lmb **start_lmb, + struct drmem_lmb **end_lmb) +{ + struct drmem_lmb *lmb, *start, *end; + struct drmem_lmb *limit; + + start = NULL; + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { + start = lmb; + break; + } + } + + if (!start) + return -EINVAL; + + end = &start[n_lmbs]; + + limit = &drmem_info->lmbs[drmem_info->n_lmbs]; + if (end > limit) + return -EINVAL; + + *start_lmb = start; + *end_lmb = end; + return 0; +} + +static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online) +{ + struct memory_block *mem_block; + int rc; + + mem_block = lmb_to_memblock(lmb); + if (!mem_block) + return -EINVAL; + + if (online && mem_block->dev.offline) + rc = device_online(&mem_block->dev); + else if (!online && !mem_block->dev.offline) + rc = device_offline(&mem_block->dev); + else + rc = 0; + + put_device(&mem_block->dev); + + return rc; +} + +static int dlpar_online_lmb(struct drmem_lmb *lmb) +{ + return dlpar_change_lmb_state(lmb, true); +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +static int dlpar_offline_lmb(struct drmem_lmb *lmb) +{ + return dlpar_change_lmb_state(lmb, false); +} + +static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size) +{ + unsigned long start_pfn; + int sections_per_block; + int i; + + start_pfn = base >> PAGE_SHIFT; + + lock_device_hotplug(); + + if (!pfn_valid(start_pfn)) + goto out; + + sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE; + + for (i = 0; i < sections_per_block; i++) { + __remove_memory(base, MIN_MEMORY_BLOCK_SIZE); + base += MIN_MEMORY_BLOCK_SIZE; + } + +out: + /* Update memory regions for memory remove */ + memblock_remove(base, memblock_size); + unlock_device_hotplug(); + return 0; +} + +static int pseries_remove_mem_node(struct device_node *np) +{ + int ret; + struct resource res; + + /* + * Check to see if we are actually removing memory + */ + if (!of_node_is_type(np, "memory")) + return 0; + + /* + * Find the base address and size of the memblock + */ + ret = of_address_to_resource(np, 0, &res); + if (ret) + return ret; + + pseries_remove_memblock(res.start, resource_size(&res)); + return 0; +} + +static bool lmb_is_removable(struct drmem_lmb *lmb) +{ + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) + return false; + +#ifdef CONFIG_FA_DUMP + /* + * Don't hot-remove memory that falls in fadump boot memory area + * and memory that is reserved for capturing old kernel memory. + */ + if (is_fadump_memory_area(lmb->base_addr, memory_block_size_bytes())) + return false; +#endif + /* device_offline() will determine if we can actually remove this lmb */ + return true; +} + +static int dlpar_add_lmb(struct drmem_lmb *); + +static int dlpar_remove_lmb(struct drmem_lmb *lmb) +{ + struct memory_block *mem_block; + int rc; + + if (!lmb_is_removable(lmb)) + return -EINVAL; + + mem_block = lmb_to_memblock(lmb); + if (mem_block == NULL) + return -EINVAL; + + rc = dlpar_offline_lmb(lmb); + if (rc) { + put_device(&mem_block->dev); + return rc; + } + + __remove_memory(lmb->base_addr, memory_block_size); + put_device(&mem_block->dev); + + /* Update memory regions for memory remove */ + memblock_remove(lmb->base_addr, memory_block_size); + + invalidate_lmb_associativity_index(lmb); + lmb->flags &= ~DRCONF_MEM_ASSIGNED; + + return 0; +} + +static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) +{ + struct drmem_lmb *lmb; + int lmbs_reserved = 0; + int lmbs_available = 0; + int rc; + + pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove); + + if (lmbs_to_remove == 0) + return -EINVAL; + + /* Validate that there are enough LMBs to satisfy the request */ + for_each_drmem_lmb(lmb) { + if (lmb_is_removable(lmb)) + lmbs_available++; + + if (lmbs_available == lmbs_to_remove) + break; + } + + if (lmbs_available < lmbs_to_remove) { + pr_info("Not enough LMBs available (%d of %d) to satisfy request\n", + lmbs_available, lmbs_to_remove); + return -EINVAL; + } + + for_each_drmem_lmb(lmb) { + rc = dlpar_remove_lmb(lmb); + if (rc) + continue; + + /* Mark this lmb so we can add it later if all of the + * requested LMBs cannot be removed. + */ + drmem_mark_lmb_reserved(lmb); + + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_remove) + break; + } + + if (lmbs_reserved != lmbs_to_remove) { + pr_err("Memory hot-remove failed, adding LMB's back\n"); + + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + rc = dlpar_add_lmb(lmb); + if (rc) + pr_err("Failed to add LMB back, drc index %x\n", + lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; + } + + rc = -EINVAL; + } else { + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + dlpar_release_drc(lmb->drc_index); + pr_info("Memory at %llx was hot-removed\n", + lmb->base_addr); + + drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; + } + rc = 0; + } + + return rc; +} + +static int dlpar_memory_remove_by_index(u32 drc_index) +{ + struct drmem_lmb *lmb; + int lmb_found; + int rc; + + pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index); + + lmb_found = 0; + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { + lmb_found = 1; + rc = dlpar_remove_lmb(lmb); + if (!rc) + dlpar_release_drc(lmb->drc_index); + + break; + } + } + + if (!lmb_found) { + pr_debug("Failed to look up LMB for drc index %x\n", drc_index); + rc = -EINVAL; + } else if (rc) { + pr_debug("Failed to hot-remove memory at %llx\n", + lmb->base_addr); + } else { + pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr); + } + + return rc; +} + +static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) +{ + struct drmem_lmb *lmb, *start_lmb, *end_lmb; + int rc; + + pr_info("Attempting to hot-remove %u LMB(s) at %x\n", + lmbs_to_remove, drc_index); + + if (lmbs_to_remove == 0) + return -EINVAL; + + rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb); + if (rc) + return -EINVAL; + + /* + * Validate that all LMBs in range are not reserved. Note that it + * is ok if they are !ASSIGNED since our goal here is to remove the + * LMB range, regardless of whether some LMBs were already removed + * by any other reason. + * + * This is a contrast to what is done in remove_by_count() where we + * check for both RESERVED and !ASSIGNED (via lmb_is_removable()), + * because we want to remove a fixed amount of LMBs in that function. + */ + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } + } + + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + /* + * dlpar_remove_lmb() will error out if the LMB is already + * !ASSIGNED, but this case is a no-op for us. + */ + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) + continue; + + rc = dlpar_remove_lmb(lmb); + if (rc) + break; + + drmem_mark_lmb_reserved(lmb); + } + + if (rc) { + pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n"); + + + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the LMB removal failed. + */ + dlpar_unisolate_drc(lmb->drc_index); + + rc = dlpar_add_lmb(lmb); + if (rc) + pr_err("Failed to add LMB, drc index %x\n", + lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); + } + rc = -EINVAL; + } else { + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + dlpar_release_drc(lmb->drc_index); + pr_info("Memory at %llx (drc index %x) was hot-removed\n", + lmb->base_addr, lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); + } + } + + return rc; +} + +#else +static inline int pseries_remove_memblock(unsigned long base, + unsigned long memblock_size) +{ + return -EOPNOTSUPP; +} +static inline int pseries_remove_mem_node(struct device_node *np) +{ + return 0; +} +static int dlpar_remove_lmb(struct drmem_lmb *lmb) +{ + return -EOPNOTSUPP; +} +static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) +{ + return -EOPNOTSUPP; +} +static int dlpar_memory_remove_by_index(u32 drc_index) +{ + return -EOPNOTSUPP; +} + +static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_MEMORY_HOTREMOVE */ + +static int dlpar_add_lmb(struct drmem_lmb *lmb) +{ + unsigned long block_sz; + int nid, rc; + + if (lmb->flags & DRCONF_MEM_ASSIGNED) + return -EINVAL; + + rc = update_lmb_associativity_index(lmb); + if (rc) { + dlpar_release_drc(lmb->drc_index); + return rc; + } + + block_sz = memory_block_size_bytes(); + + /* Find the node id for this LMB. Fake one if necessary. */ + nid = of_drconf_to_nid_single(lmb); + if (nid < 0 || !node_possible(nid)) + nid = first_online_node; + + /* Add the memory */ + rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY); + if (rc) { + invalidate_lmb_associativity_index(lmb); + return rc; + } + + rc = dlpar_online_lmb(lmb); + if (rc) { + __remove_memory(lmb->base_addr, block_sz); + invalidate_lmb_associativity_index(lmb); + } else { + lmb->flags |= DRCONF_MEM_ASSIGNED; + } + + return rc; +} + +static int dlpar_memory_add_by_count(u32 lmbs_to_add) +{ + struct drmem_lmb *lmb; + int lmbs_available = 0; + int lmbs_reserved = 0; + int rc; + + pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add); + + if (lmbs_to_add == 0) + return -EINVAL; + + /* Validate that there are enough LMBs to satisfy the request */ + for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) + continue; + + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) + lmbs_available++; + + if (lmbs_available == lmbs_to_add) + break; + } + + if (lmbs_available < lmbs_to_add) + return -EINVAL; + + for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_ASSIGNED) + continue; + + rc = dlpar_acquire_drc(lmb->drc_index); + if (rc) + continue; + + rc = dlpar_add_lmb(lmb); + if (rc) { + dlpar_release_drc(lmb->drc_index); + continue; + } + + /* Mark this lmb so we can remove it later if all of the + * requested LMBs cannot be added. + */ + drmem_mark_lmb_reserved(lmb); + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_add) + break; + } + + if (lmbs_reserved != lmbs_to_add) { + pr_err("Memory hot-add failed, removing any added LMBs\n"); + + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + rc = dlpar_remove_lmb(lmb); + if (rc) + pr_err("Failed to remove LMB, drc index %x\n", + lmb->drc_index); + else + dlpar_release_drc(lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; + } + rc = -EINVAL; + } else { + for_each_drmem_lmb(lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + pr_debug("Memory at %llx (drc index %x) was hot-added\n", + lmb->base_addr, lmb->drc_index); + drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; + } + rc = 0; + } + + return rc; +} + +static int dlpar_memory_add_by_index(u32 drc_index) +{ + struct drmem_lmb *lmb; + int rc, lmb_found; + + pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index); + + lmb_found = 0; + for_each_drmem_lmb(lmb) { + if (lmb->drc_index == drc_index) { + lmb_found = 1; + rc = dlpar_acquire_drc(lmb->drc_index); + if (!rc) { + rc = dlpar_add_lmb(lmb); + if (rc) + dlpar_release_drc(lmb->drc_index); + } + + break; + } + } + + if (!lmb_found) + rc = -EINVAL; + + if (rc) + pr_info("Failed to hot-add memory, drc index %x\n", drc_index); + else + pr_info("Memory at %llx (drc index %x) was hot-added\n", + lmb->base_addr, drc_index); + + return rc; +} + +static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) +{ + struct drmem_lmb *lmb, *start_lmb, *end_lmb; + int rc; + + pr_info("Attempting to hot-add %u LMB(s) at index %x\n", + lmbs_to_add, drc_index); + + if (lmbs_to_add == 0) + return -EINVAL; + + rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb); + if (rc) + return -EINVAL; + + /* Validate that the LMBs in this range are not reserved */ + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + /* Fail immediately if the whole range can't be hot-added */ + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } + } + + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (lmb->flags & DRCONF_MEM_ASSIGNED) + continue; + + rc = dlpar_acquire_drc(lmb->drc_index); + if (rc) + break; + + rc = dlpar_add_lmb(lmb); + if (rc) { + dlpar_release_drc(lmb->drc_index); + break; + } + + drmem_mark_lmb_reserved(lmb); + } + + if (rc) { + pr_err("Memory indexed-count-add failed, removing any added LMBs\n"); + + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + rc = dlpar_remove_lmb(lmb); + if (rc) + pr_err("Failed to remove LMB, drc index %x\n", + lmb->drc_index); + else + dlpar_release_drc(lmb->drc_index); + + drmem_remove_lmb_reservation(lmb); + } + rc = -EINVAL; + } else { + for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + if (!drmem_lmb_reserved(lmb)) + continue; + + pr_info("Memory at %llx (drc index %x) was hot-added\n", + lmb->base_addr, lmb->drc_index); + drmem_remove_lmb_reservation(lmb); + } + } + + return rc; +} + +int dlpar_memory(struct pseries_hp_errorlog *hp_elog) +{ + u32 count, drc_index; + int rc; + + lock_device_hotplug(); + + switch (hp_elog->action) { + case PSERIES_HP_ELOG_ACTION_ADD: + switch (hp_elog->id_type) { + case PSERIES_HP_ELOG_ID_DRC_COUNT: + count = hp_elog->_drc_u.drc_count; + rc = dlpar_memory_add_by_count(count); + break; + case PSERIES_HP_ELOG_ID_DRC_INDEX: + drc_index = hp_elog->_drc_u.drc_index; + rc = dlpar_memory_add_by_index(drc_index); + break; + case PSERIES_HP_ELOG_ID_DRC_IC: + count = hp_elog->_drc_u.ic.count; + drc_index = hp_elog->_drc_u.ic.index; + rc = dlpar_memory_add_by_ic(count, drc_index); + break; + default: + rc = -EINVAL; + break; + } + + break; + case PSERIES_HP_ELOG_ACTION_REMOVE: + switch (hp_elog->id_type) { + case PSERIES_HP_ELOG_ID_DRC_COUNT: + count = hp_elog->_drc_u.drc_count; + rc = dlpar_memory_remove_by_count(count); + break; + case PSERIES_HP_ELOG_ID_DRC_INDEX: + drc_index = hp_elog->_drc_u.drc_index; + rc = dlpar_memory_remove_by_index(drc_index); + break; + case PSERIES_HP_ELOG_ID_DRC_IC: + count = hp_elog->_drc_u.ic.count; + drc_index = hp_elog->_drc_u.ic.index; + rc = dlpar_memory_remove_by_ic(count, drc_index); + break; + default: + rc = -EINVAL; + break; + } + + break; + default: + pr_err("Invalid action (%d) specified\n", hp_elog->action); + rc = -EINVAL; + break; + } + + if (!rc) + rc = drmem_update_dt(); + + unlock_device_hotplug(); + return rc; +} + +static int pseries_add_mem_node(struct device_node *np) +{ + int ret; + struct resource res; + + /* + * Check to see if we are actually adding memory + */ + if (!of_node_is_type(np, "memory")) + return 0; + + /* + * Find the base and size of the memblock + */ + ret = of_address_to_resource(np, 0, &res); + if (ret) + return ret; + + /* + * Update memory region to represent the memory add + */ + ret = memblock_add(res.start, resource_size(&res)); + return (ret < 0) ? -EINVAL : 0; +} + +static int pseries_memory_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + int err = 0; + + switch (action) { + case OF_RECONFIG_ATTACH_NODE: + err = pseries_add_mem_node(rd->dn); + break; + case OF_RECONFIG_DETACH_NODE: + err = pseries_remove_mem_node(rd->dn); + break; + case OF_RECONFIG_UPDATE_PROPERTY: + if (!strcmp(rd->dn->name, + "ibm,dynamic-reconfiguration-memory")) + drmem_update_lmbs(rd->prop); + } + return notifier_from_errno(err); +} + +static struct notifier_block pseries_mem_nb = { + .notifier_call = pseries_memory_notifier, +}; + +static int __init pseries_memory_hotplug_init(void) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_mem_nb); + + return 0; +} +machine_device_initcall(pseries, pseries_memory_hotplug_init); diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S new file mode 100644 index 0000000000..2b0cac6fb6 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -0,0 +1,370 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains the generic code to perform a call to the + * pSeries LPAR hypervisor. + */ +#include +#include +#include +#include +#include +#include +#include + + .section ".text" + +#ifdef CONFIG_TRACEPOINTS + +#ifndef CONFIG_JUMP_LABEL + .data + + .globl hcall_tracepoint_refcount +hcall_tracepoint_refcount: + .8byte 0 + + .section ".text" +#endif + +/* + * precall must preserve all registers. use unused STK_PARAM() + * areas to save snapshots and opcode. STK_PARAM() in the caller's + * frame will be available even on ELFv2 because these are all + * variadic functions. + */ +#define HCALL_INST_PRECALL(FIRST_REG) \ + mflr r0; \ + std r3,STK_PARAM(R3)(r1); \ + std r4,STK_PARAM(R4)(r1); \ + std r5,STK_PARAM(R5)(r1); \ + std r6,STK_PARAM(R6)(r1); \ + std r7,STK_PARAM(R7)(r1); \ + std r8,STK_PARAM(R8)(r1); \ + std r9,STK_PARAM(R9)(r1); \ + std r10,STK_PARAM(R10)(r1); \ + std r0,16(r1); \ + addi r4,r1,STK_PARAM(FIRST_REG); \ + stdu r1,-STACK_FRAME_MIN_SIZE(r1); \ + bl CFUNC(__trace_hcall_entry); \ + ld r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + ld r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1); \ + ld r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1); \ + ld r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1); \ + ld r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1); \ + ld r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1); \ + ld r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1); \ + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1) + +/* + * postcall is performed immediately before function return which + * allows liberal use of volatile registers. + */ +#define __HCALL_INST_POSTCALL \ + ld r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + std r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \ + mr r4,r3; \ + mr r3,r0; \ + bl CFUNC(__trace_hcall_exit); \ + ld r0,STACK_FRAME_MIN_SIZE+16(r1); \ + addi r1,r1,STACK_FRAME_MIN_SIZE; \ + ld r3,STK_PARAM(R3)(r1); \ + mtlr r0 + +#define HCALL_INST_POSTCALL_NORETS \ + li r5,0; \ + __HCALL_INST_POSTCALL + +#define HCALL_INST_POSTCALL(BUFREG) \ + mr r5,BUFREG; \ + __HCALL_INST_POSTCALL + +#ifdef CONFIG_JUMP_LABEL +#define HCALL_BRANCH(LABEL) \ + ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) +#else + +/* + * We branch around this in early init (eg when populating the MMU + * hashtable) by using an unconditional cpu feature. + */ +#define HCALL_BRANCH(LABEL) \ +BEGIN_FTR_SECTION; \ + b 1f; \ +END_FTR_SECTION(0, 1); \ + LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ; \ + ld r12,0(r12); \ + cmpdi r12,0; \ + bne- LABEL; \ +1: +#endif + +#else +#define HCALL_INST_PRECALL(FIRST_ARG) +#define HCALL_INST_POSTCALL_NORETS +#define HCALL_INST_POSTCALL(BUFREG) +#define HCALL_BRANCH(LABEL) +#endif + +_GLOBAL_TOC(plpar_hcall_norets_notrace) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + HVSC /* invoke the hypervisor */ + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + +_GLOBAL_TOC(plpar_hcall_norets) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + HCALL_BRANCH(plpar_hcall_norets_trace) + HVSC /* invoke the hypervisor */ + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_norets_trace: + HCALL_INST_PRECALL(R4) + HVSC + HCALL_INST_POSTCALL_NORETS + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + blr +#endif + +_GLOBAL_TOC(plpar_hcall) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + HCALL_BRANCH(plpar_hcall_trace) + + std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + HVSC /* invoke the hypervisor */ + + ld r12,STK_PARAM(R4)(r1) + std r4, 0(r12) + std r5, 8(r12) + std r6, 16(r12) + std r7, 24(r12) + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall_trace: + HCALL_INST_PRECALL(R5) + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + HVSC + + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + + HCALL_INST_POSTCALL(r12) + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr +#endif + +/* + * plpar_hcall_raw can be called in real mode. kexec/kdump need some + * hypervisor calls to be executed in real mode. So plpar_hcall_raw + * does not access the per cpu hypervisor call statistics variables, + * since these variables may not be present in the RMO region. + */ +_GLOBAL(plpar_hcall_raw) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + + HVSC /* invoke the hypervisor */ + + ld r12,STK_PARAM(R4)(r1) + std r4, 0(r12) + std r5, 8(r12) + std r6, 16(r12) + std r7, 24(r12) + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +_GLOBAL_TOC(plpar_hcall9) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + HCALL_BRANCH(plpar_hcall9_trace) + + std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STK_PARAM(R11)(r1) /* put arg7 in R10 */ + ld r11,STK_PARAM(R12)(r1) /* put arg8 in R11 */ + ld r12,STK_PARAM(R13)(r1) /* put arg9 in R12 */ + + HVSC /* invoke the hypervisor */ + + mr r0,r12 + ld r12,STK_PARAM(R4)(r1) + std r4, 0(r12) + std r5, 8(r12) + std r6, 16(r12) + std r7, 24(r12) + std r8, 32(r12) + std r9, 40(r12) + std r10,48(r12) + std r11,56(r12) + std r0, 64(r12) + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ + +#ifdef CONFIG_TRACEPOINTS +plpar_hcall9_trace: + HCALL_INST_PRECALL(R5) + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1) + ld r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1) + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1) + + HVSC + + mr r0,r12 + ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1) + std r4,0(r12) + std r5,8(r12) + std r6,16(r12) + std r7,24(r12) + std r8,32(r12) + std r9,40(r12) + std r10,48(r12) + std r11,56(r12) + std r0,64(r12) + + HCALL_INST_POSTCALL(r12) + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr +#endif + +/* See plpar_hcall_raw to see why this is needed */ +_GLOBAL(plpar_hcall9_raw) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + + std r4,STK_PARAM(R4)(r1) /* Save ret buffer */ + + mr r4,r5 + mr r5,r6 + mr r6,r7 + mr r7,r8 + mr r8,r9 + mr r9,r10 + ld r10,STK_PARAM(R11)(r1) /* put arg7 in R10 */ + ld r11,STK_PARAM(R12)(r1) /* put arg8 in R11 */ + ld r12,STK_PARAM(R13)(r1) /* put arg9 in R12 */ + + HVSC /* invoke the hypervisor */ + + mr r0,r12 + ld r12,STK_PARAM(R4)(r1) + std r4, 0(r12) + std r5, 8(r12) + std r6, 16(r12) + std r7, 24(r12) + std r8, 32(r12) + std r9, 40(r12) + std r10,48(r12) + std r11,56(r12) + std r0, 64(r12) + + li r4,0 + stb r4,PACASRR_VALID(r13) + + lwz r0,8(r1) + mtcrf 0xff,r0 + + blr /* return r3 = status */ diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c new file mode 100644 index 0000000000..3a50612a78 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2006 Mike Kravetz IBM Corporation + * + * Hypervisor Call Instrumentation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* For hcall instrumentation. One structure per-hcall, per-CPU */ +struct hcall_stats { + unsigned long num_calls; /* number of calls (on this CPU) */ + unsigned long tb_total; /* total wall time (mftb) of calls. */ + unsigned long purr_total; /* total cpu time (PURR) of calls. */ + unsigned long tb_start; + unsigned long purr_start; +}; +#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) + +static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); + +/* + * Routines for displaying the statistics in debugfs + */ +static void *hc_start(struct seq_file *m, loff_t *pos) +{ + if ((int)*pos < (HCALL_STAT_ARRAY_SIZE-1)) + return (void *)(unsigned long)(*pos + 1); + + return NULL; +} + +static void *hc_next(struct seq_file *m, void *p, loff_t * pos) +{ + ++*pos; + + return hc_start(m, pos); +} + +static void hc_stop(struct seq_file *m, void *p) +{ +} + +static int hc_show(struct seq_file *m, void *p) +{ + unsigned long h_num = (unsigned long)p; + struct hcall_stats *hs = m->private; + + if (hs[h_num].num_calls) { + if (cpu_has_feature(CPU_FTR_PURR)) + seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2, + hs[h_num].num_calls, + hs[h_num].tb_total, + hs[h_num].purr_total); + else + seq_printf(m, "%lu %lu %lu\n", h_num<<2, + hs[h_num].num_calls, + hs[h_num].tb_total); + } + + return 0; +} + +static const struct seq_operations hcall_inst_sops = { + .start = hc_start, + .next = hc_next, + .stop = hc_stop, + .show = hc_show +}; + +DEFINE_SEQ_ATTRIBUTE(hcall_inst); + +#define HCALL_ROOT_DIR "hcall_inst" +#define CPU_NAME_BUF_SIZE 32 + + +static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long *args) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = this_cpu_ptr(&hcall_stats[opcode / 4]); + h->tb_start = mftb(); + h->purr_start = mfspr(SPRN_PURR); +} + +static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval, + unsigned long *retbuf) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = this_cpu_ptr(&hcall_stats[opcode / 4]); + h->num_calls++; + h->tb_total += mftb() - h->tb_start; + h->purr_total += mfspr(SPRN_PURR) - h->purr_start; +} + +static int __init hcall_inst_init(void) +{ + struct dentry *hcall_root; + char cpu_name_buf[CPU_NAME_BUF_SIZE]; + int cpu; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + if (register_trace_hcall_entry(probe_hcall_entry, NULL)) + return -EINVAL; + + if (register_trace_hcall_exit(probe_hcall_exit, NULL)) { + unregister_trace_hcall_entry(probe_hcall_entry, NULL); + return -EINVAL; + } + + hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); + + for_each_possible_cpu(cpu) { + snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu); + debugfs_create_file(cpu_name_buf, 0444, hcall_root, + per_cpu(hcall_stats, cpu), + &hcall_inst_fops); + } + + return 0; +} +machine_device_initcall(pseries, hcall_inst_init); diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c new file mode 100644 index 0000000000..1ac52963e0 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * hvconsole.c + * Copyright (C) 2004 Hollis Blanchard, IBM Corporation + * Copyright (C) 2004 IBM Corporation + * + * Additional Author(s): + * Ryan S. Arnold + * + * LPAR console support. + */ + +#include +#include +#include +#include +#include +#include + +/** + * hvc_get_chars - retrieve characters from firmware for denoted vterm adapter + * @vtermno: The vtermno or unit_address of the adapter from which to fetch the + * data. + * @buf: The character buffer into which to put the character data fetched from + * firmware. + * @count: not used? + */ +int hvc_get_chars(uint32_t vtermno, char *buf, int count) +{ + long ret; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + unsigned long *lbuf = (unsigned long *)buf; + + ret = plpar_hcall(H_GET_TERM_CHAR, retbuf, vtermno); + lbuf[0] = be64_to_cpu(retbuf[1]); + lbuf[1] = be64_to_cpu(retbuf[2]); + + if (ret == H_SUCCESS) + return retbuf[0]; + + return 0; +} + +EXPORT_SYMBOL(hvc_get_chars); + + +/** + * hvc_put_chars: send characters to firmware for denoted vterm adapter + * @vtermno: The vtermno or unit_address of the adapter from which the data + * originated. + * @buf: The character buffer that contains the character data to send to + * firmware. Must be at least 16 bytes, even if count is less than 16. + * @count: Send this number of characters. + */ +int hvc_put_chars(uint32_t vtermno, const char *buf, int count) +{ + unsigned long *lbuf = (unsigned long *) buf; + long ret; + + + /* hcall will ret H_PARAMETER if 'count' exceeds firmware max.*/ + if (count > MAX_VIO_PUT_CHARS) + count = MAX_VIO_PUT_CHARS; + + ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, + cpu_to_be64(lbuf[0]), + cpu_to_be64(lbuf[1])); + if (ret == H_SUCCESS) + return count; + if (ret == H_BUSY) + return -EAGAIN; + return -EIO; +} + +EXPORT_SYMBOL(hvc_put_chars); diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c new file mode 100644 index 0000000000..d48c9c7ce1 --- /dev/null +++ b/arch/powerpc/platforms/pseries/hvcserver.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * hvcserver.c + * Copyright (C) 2004 Ryan S Arnold, IBM Corporation + * + * PPC64 virtual I/O console server support. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define HVCS_ARCH_VERSION "1.0.0" + +MODULE_AUTHOR("Ryan S. Arnold "); +MODULE_DESCRIPTION("IBM hvcs ppc64 API"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(HVCS_ARCH_VERSION); + +/* + * Convert arch specific return codes into relevant errnos. The hvcs + * functions aren't performance sensitive, so this conversion isn't an + * issue. + */ +static int hvcs_convert(long to_convert) +{ + switch (to_convert) { + case H_SUCCESS: + return 0; + case H_PARAMETER: + return -EINVAL; + case H_HARDWARE: + return -EIO; + case H_BUSY: + case H_LONG_BUSY_ORDER_1_MSEC: + case H_LONG_BUSY_ORDER_10_MSEC: + case H_LONG_BUSY_ORDER_100_MSEC: + case H_LONG_BUSY_ORDER_1_SEC: + case H_LONG_BUSY_ORDER_10_SEC: + case H_LONG_BUSY_ORDER_100_SEC: + return -EBUSY; + case H_FUNCTION: + default: + return -EPERM; + } +} + +/** + * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info + * @head: list_head pointer for an allocated list of partner info structs to + * free. + * + * This function is used to free the partner info list that was returned by + * calling hvcs_get_partner_info(). + */ +int hvcs_free_partner_info(struct list_head *head) +{ + struct hvcs_partner_info *pi; + struct list_head *element; + + if (!head) + return -EINVAL; + + while (!list_empty(head)) { + element = head->next; + pi = list_entry(element, struct hvcs_partner_info, node); + list_del(element); + kfree(pi); + } + + return 0; +} +EXPORT_SYMBOL(hvcs_free_partner_info); + +/* Helper function for hvcs_get_partner_info */ +static int hvcs_next_partner(uint32_t unit_address, + unsigned long last_p_partition_ID, + unsigned long last_p_unit_address, unsigned long *pi_buff) + +{ + long retval; + retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address, + last_p_partition_ID, + last_p_unit_address, virt_to_phys(pi_buff)); + return hvcs_convert(retval); +} + +/** + * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter + * @unit_address: The unit_address of the vty-server adapter for which this + * function is fetching partner info. + * @head: An initialized list_head pointer to an empty list to use to return the + * list of partner info fetched from the hypervisor to the caller. + * @pi_buff: A page sized buffer pre-allocated prior to calling this function + * that is to be used to be used by firmware as an iterator to keep track + * of the partner info retrieval. + * + * This function returns non-zero on success, or if there is no partner info. + * + * The pi_buff is pre-allocated prior to calling this function because this + * function may be called with a spin_lock held and kmalloc of a page is not + * recommended as GFP_ATOMIC. + * + * The first long of this buffer is used to store a partner unit address. The + * second long is used to store a partner partition ID and starting at + * pi_buff[2] is the 79 character Converged Location Code (diff size than the + * unsigned longs, hence the casting mumbo jumbo you see later). + * + * Invocation of this function should always be followed by an invocation of + * hvcs_free_partner_info() using a pointer to the SAME list head instance + * that was passed as a parameter to this function. + */ +int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head, + unsigned long *pi_buff) +{ + /* + * Dealt with as longs because of the hcall interface even though the + * values are uint32_t. + */ + unsigned long last_p_partition_ID; + unsigned long last_p_unit_address; + struct hvcs_partner_info *next_partner_info = NULL; + int more = 1; + int retval; + + /* invalid parameters */ + if (!head || !pi_buff) + return -EINVAL; + + memset(pi_buff, 0x00, PAGE_SIZE); + last_p_partition_ID = last_p_unit_address = ~0UL; + INIT_LIST_HEAD(head); + + do { + retval = hvcs_next_partner(unit_address, last_p_partition_ID, + last_p_unit_address, pi_buff); + if (retval) { + /* + * Don't indicate that we've failed if we have + * any list elements. + */ + if (!list_empty(head)) + return 0; + return retval; + } + + last_p_partition_ID = be64_to_cpu(pi_buff[0]); + last_p_unit_address = be64_to_cpu(pi_buff[1]); + + /* This indicates that there are no further partners */ + if (last_p_partition_ID == ~0UL + && last_p_unit_address == ~0UL) + break; + + /* This is a very small struct and will be freed soon in + * hvcs_free_partner_info(). */ + next_partner_info = kmalloc(sizeof(struct hvcs_partner_info), + GFP_ATOMIC); + + if (!next_partner_info) { + printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to" + " allocate partner info struct.\n"); + hvcs_free_partner_info(head); + return -ENOMEM; + } + + next_partner_info->unit_address + = (unsigned int)last_p_unit_address; + next_partner_info->partition_ID + = (unsigned int)last_p_partition_ID; + + /* copy the Null-term char too */ + strscpy(&next_partner_info->location_code[0], + (char *)&pi_buff[2], + sizeof(next_partner_info->location_code)); + + list_add_tail(&(next_partner_info->node), head); + next_partner_info = NULL; + + } while (more); + + return 0; +} +EXPORT_SYMBOL(hvcs_get_partner_info); + +/** + * hvcs_register_connection - establish a connection between this vty-server and + * a vty. + * @unit_address: The unit address of the vty-server adapter that is to be + * establish a connection. + * @p_partition_ID: The partition ID of the vty adapter that is to be connected. + * @p_unit_address: The unit address of the vty adapter to which the vty-server + * is to be connected. + * + * If this function is called once and -EINVAL is returned it may + * indicate that the partner info needs to be refreshed for the + * target unit address at which point the caller must invoke + * hvcs_get_partner_info() and then call this function again. If, + * for a second time, -EINVAL is returned then it indicates that + * there is probably already a partner connection registered to a + * different vty-server adapter. It is also possible that a second + * -EINVAL may indicate that one of the parms is not valid, for + * instance if the link was removed between the vty-server adapter + * and the vty adapter that you are trying to open. Don't shoot the + * messenger. Firmware implemented it this way. + */ +int hvcs_register_connection( uint32_t unit_address, + uint32_t p_partition_ID, uint32_t p_unit_address) +{ + long retval; + retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address, + p_partition_ID, p_unit_address); + return hvcs_convert(retval); +} +EXPORT_SYMBOL(hvcs_register_connection); + +/** + * hvcs_free_connection - free the connection between a vty-server and vty + * @unit_address: The unit address of the vty-server that is to have its + * connection severed. + * + * This function is used to free the partner connection between a vty-server + * adapter and a vty adapter. + * + * If -EBUSY is returned continue to call this function until 0 is returned. + */ +int hvcs_free_connection(uint32_t unit_address) +{ + long retval; + retval = plpar_hcall_norets(H_FREE_VTERM, unit_address); + return hvcs_convert(retval); +} +EXPORT_SYMBOL(hvcs_free_connection); diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c new file mode 100644 index 0000000000..998e3aff24 --- /dev/null +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -0,0 +1,479 @@ +/* + * IBM PowerPC IBM eBus Infrastructure Support. + * + * Copyright (c) 2005 IBM Corporation + * Joachim Fenkes + * Heiko J Schick + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct device ibmebus_bus_device = { /* fake "parent" device */ + .init_name = "ibmebus", +}; + +struct bus_type ibmebus_bus_type; + +/* These devices will automatically be added to the bus during init */ +static const struct of_device_id ibmebus_matches[] __initconst = { + { .compatible = "IBM,lhca" }, + { .compatible = "IBM,lhea" }, + {}, +}; + +static void *ibmebus_alloc_coherent(struct device *dev, + size_t size, + dma_addr_t *dma_handle, + gfp_t flag, + unsigned long attrs) +{ + void *mem; + + mem = kmalloc(size, flag); + *dma_handle = (dma_addr_t)mem; + + return mem; +} + +static void ibmebus_free_coherent(struct device *dev, + size_t size, void *vaddr, + dma_addr_t dma_handle, + unsigned long attrs) +{ + kfree(vaddr); +} + +static dma_addr_t ibmebus_map_page(struct device *dev, + struct page *page, + unsigned long offset, + size_t size, + enum dma_data_direction direction, + unsigned long attrs) +{ + return (dma_addr_t)(page_address(page) + offset); +} + +static void ibmebus_unmap_page(struct device *dev, + dma_addr_t dma_addr, + size_t size, + enum dma_data_direction direction, + unsigned long attrs) +{ + return; +} + +static int ibmebus_map_sg(struct device *dev, + struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + sg->dma_address = (dma_addr_t) sg_virt(sg); + sg->dma_length = sg->length; + } + + return nents; +} + +static void ibmebus_unmap_sg(struct device *dev, + struct scatterlist *sg, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + return; +} + +static int ibmebus_dma_supported(struct device *dev, u64 mask) +{ + return mask == DMA_BIT_MASK(64); +} + +static u64 ibmebus_dma_get_required_mask(struct device *dev) +{ + return DMA_BIT_MASK(64); +} + +static const struct dma_map_ops ibmebus_dma_ops = { + .alloc = ibmebus_alloc_coherent, + .free = ibmebus_free_coherent, + .map_sg = ibmebus_map_sg, + .unmap_sg = ibmebus_unmap_sg, + .dma_supported = ibmebus_dma_supported, + .get_required_mask = ibmebus_dma_get_required_mask, + .map_page = ibmebus_map_page, + .unmap_page = ibmebus_unmap_page, +}; + +static int ibmebus_match_path(struct device *dev, const void *data) +{ + struct device_node *dn = to_platform_device(dev)->dev.of_node; + struct device_node *tn = of_find_node_by_path(data); + + of_node_put(tn); + + return (tn == dn); +} + +static int ibmebus_match_node(struct device *dev, const void *data) +{ + return to_platform_device(dev)->dev.of_node == data; +} + +static int ibmebus_create_device(struct device_node *dn) +{ + struct platform_device *dev; + int ret; + + dev = of_device_alloc(dn, NULL, &ibmebus_bus_device); + if (!dev) + return -ENOMEM; + + dev->dev.bus = &ibmebus_bus_type; + dev->dev.dma_ops = &ibmebus_dma_ops; + + ret = of_device_add(dev); + if (ret) + platform_device_put(dev); + return ret; +} + +static int ibmebus_create_devices(const struct of_device_id *matches) +{ + struct device_node *root, *child; + struct device *dev; + int ret = 0; + + root = of_find_node_by_path("/"); + + for_each_child_of_node(root, child) { + if (!of_match_node(matches, child)) + continue; + + dev = bus_find_device(&ibmebus_bus_type, NULL, child, + ibmebus_match_node); + if (dev) { + put_device(dev); + continue; + } + + ret = ibmebus_create_device(child); + if (ret) { + printk(KERN_ERR "%s: failed to create device (%i)", + __func__, ret); + of_node_put(child); + break; + } + } + + of_node_put(root); + return ret; +} + +int ibmebus_register_driver(struct platform_driver *drv) +{ + /* If the driver uses devices that ibmebus doesn't know, add them */ + ibmebus_create_devices(drv->driver.of_match_table); + + drv->driver.bus = &ibmebus_bus_type; + return driver_register(&drv->driver); +} +EXPORT_SYMBOL(ibmebus_register_driver); + +void ibmebus_unregister_driver(struct platform_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(ibmebus_unregister_driver); + +int ibmebus_request_irq(u32 ist, irq_handler_t handler, + unsigned long irq_flags, const char *devname, + void *dev_id) +{ + unsigned int irq = irq_create_mapping(NULL, ist); + + if (!irq) + return -EINVAL; + + return request_irq(irq, handler, irq_flags, devname, dev_id); +} +EXPORT_SYMBOL(ibmebus_request_irq); + +void ibmebus_free_irq(u32 ist, void *dev_id) +{ + unsigned int irq = irq_find_mapping(NULL, ist); + + free_irq(irq, dev_id); + irq_dispose_mapping(irq); +} +EXPORT_SYMBOL(ibmebus_free_irq); + +static char *ibmebus_chomp(const char *in, size_t count) +{ + char *out = kmalloc(count + 1, GFP_KERNEL); + + if (!out) + return NULL; + + memcpy(out, in, count); + out[count] = '\0'; + if (out[count - 1] == '\n') + out[count - 1] = '\0'; + + return out; +} + +static ssize_t probe_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct device_node *dn = NULL; + struct device *dev; + char *path; + ssize_t rc = 0; + + path = ibmebus_chomp(buf, count); + if (!path) + return -ENOMEM; + + dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path); + if (dev) { + put_device(dev); + printk(KERN_WARNING "%s: %s has already been probed\n", + __func__, path); + rc = -EEXIST; + goto out; + } + + if ((dn = of_find_node_by_path(path))) { + rc = ibmebus_create_device(dn); + of_node_put(dn); + } else { + printk(KERN_WARNING "%s: no such device node: %s\n", + __func__, path); + rc = -ENODEV; + } + +out: + kfree(path); + if (rc) + return rc; + return count; +} +static BUS_ATTR_WO(probe); + +static ssize_t remove_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct device *dev; + char *path; + + path = ibmebus_chomp(buf, count); + if (!path) + return -ENOMEM; + + if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path, + ibmebus_match_path))) { + of_device_unregister(to_platform_device(dev)); + put_device(dev); + + kfree(path); + return count; + } else { + printk(KERN_WARNING "%s: %s not on the bus\n", + __func__, path); + + kfree(path); + return -ENODEV; + } +} +static BUS_ATTR_WO(remove); + +static struct attribute *ibmbus_bus_attrs[] = { + &bus_attr_probe.attr, + &bus_attr_remove.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ibmbus_bus); + +static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct of_device_id *matches = drv->of_match_table; + + if (!matches) + return 0; + + return of_match_device(matches, dev) != NULL; +} + +static int ibmebus_bus_device_probe(struct device *dev) +{ + int error = -ENODEV; + struct platform_driver *drv; + struct platform_device *of_dev; + + drv = to_platform_driver(dev->driver); + of_dev = to_platform_device(dev); + + if (!drv->probe) + return error; + + get_device(dev); + + if (of_driver_match_device(dev, dev->driver)) + error = drv->probe(of_dev); + if (error) + put_device(dev); + + return error; +} + +static void ibmebus_bus_device_remove(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct platform_driver *drv = to_platform_driver(dev->driver); + + if (dev->driver && drv->remove) + drv->remove(of_dev); +} + +static void ibmebus_bus_device_shutdown(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct platform_driver *drv = to_platform_driver(dev->driver); + + if (dev->driver && drv->shutdown) + drv->shutdown(of_dev); +} + +/* + * ibmebus_bus_device_attrs + */ +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%pOF\n", ofdev->dev.of_node); +} +static DEVICE_ATTR_RO(devspec); + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%pOFn\n", ofdev->dev.of_node); +} +static DEVICE_ATTR_RO(name); + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return of_device_modalias(dev, buf, PAGE_SIZE); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *ibmebus_bus_device_attrs[] = { + &dev_attr_devspec.attr, + &dev_attr_name.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ibmebus_bus_device); + +static int ibmebus_bus_modalias(const struct device *dev, struct kobj_uevent_env *env) +{ + return of_device_uevent_modalias(dev, env); +} + +struct bus_type ibmebus_bus_type = { + .name = "ibmebus", + .uevent = ibmebus_bus_modalias, + .bus_groups = ibmbus_bus_groups, + .match = ibmebus_bus_bus_match, + .probe = ibmebus_bus_device_probe, + .remove = ibmebus_bus_device_remove, + .shutdown = ibmebus_bus_device_shutdown, + .dev_groups = ibmebus_bus_device_groups, +}; +EXPORT_SYMBOL(ibmebus_bus_type); + +static int __init ibmebus_bus_init(void) +{ + int err; + + printk(KERN_INFO "IBM eBus Device Driver\n"); + + err = bus_register(&ibmebus_bus_type); + if (err) { + printk(KERN_ERR "%s: failed to register IBM eBus.\n", + __func__); + return err; + } + + err = device_register(&ibmebus_bus_device); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", + __func__, err); + put_device(&ibmebus_bus_device); + bus_unregister(&ibmebus_bus_type); + + return err; + } + + err = ibmebus_create_devices(ibmebus_matches); + if (err) { + device_unregister(&ibmebus_bus_device); + bus_unregister(&ibmebus_bus_type); + return err; + } + + return 0; +} +machine_postcore_initcall(pseries, ibmebus_bus_init); diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c new file mode 100644 index 0000000000..f411d4fe7b --- /dev/null +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "pseries.h" + +/* + * IO event interrupt is a mechanism provided by RTAS to return + * information about hardware error and non-error events. Device + * drivers can register their event handlers to receive events. + * Device drivers are expected to use atomic_notifier_chain_register() + * and atomic_notifier_chain_unregister() to register and unregister + * their event handlers. Since multiple IO event types and scopes + * share an IO event interrupt, the event handlers are called one + * by one until the IO event is claimed by one of the handlers. + * The event handlers are expected to return NOTIFY_OK if the + * event is handled by the event handler or NOTIFY_DONE if the + * event does not belong to the handler. + * + * Usage: + * + * Notifier function: + * #include + * int event_handler(struct notifier_block *nb, unsigned long val, void *data) { + * p = (struct pseries_io_event_sect_data *) data; + * if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE; + * : + * : + * return NOTIFY_OK; + * } + * struct notifier_block event_nb = { + * .notifier_call = event_handler, + * } + * + * Registration: + * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb); + * + * Unregistration: + * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb); + */ + +ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list); +EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list); + +static int ioei_check_exception_token; + +static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; + +/** + * Find the data portion of an IO Event section from event log. + * @elog: RTAS error/event log. + * + * Return: + * pointer to a valid IO event section data. NULL if not found. + */ +static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) +{ + struct pseries_errorlog *sect; + + /* We should only ever get called for io-event interrupts, but if + * we do get called for another type then something went wrong so + * make some noise about it. + * RTAS_TYPE_IO only exists in extended event log version 6 or later. + * No need to check event log version. + */ + if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) { + printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d", + rtas_error_type(elog)); + return NULL; + } + + sect = get_pseries_errorlog(elog, PSERIES_ELOG_SECT_ID_IO_EVENT); + if (unlikely(!sect)) { + printk_once(KERN_WARNING "io_event_irq: RTAS extended event " + "log does not contain an IO Event section. " + "Could be a bug in system firmware!\n"); + return NULL; + } + return (struct pseries_io_event *) §->data; +} + +/* + * PAPR: + * - check-exception returns the first found error or event and clear that + * error or event so it is reported once. + * - Each interrupt returns one event. If a plateform chooses to report + * multiple events through a single interrupt, it must ensure that the + * interrupt remains asserted until check-exception has been used to + * process all out-standing events for that interrupt. + * + * Implementation notes: + * - Events must be processed in the order they are returned. Hence, + * sequential in nature. + * - The owner of an event is determined by combinations of scope, + * event type, and sub-type. There is no easy way to pre-sort clients + * by scope or event type alone. For example, Torrent ISR route change + * event is reported with scope 0x00 (Not Applicable) rather than + * 0x3B (Torrent-hub). It is better to let the clients to identify + * who owns the event. + */ + +static irqreturn_t ioei_interrupt(int irq, void *dev_id) +{ + struct pseries_io_event *event; + int rtas_rc; + + for (;;) { + rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), + RTAS_IO_EVENTS, 1 /* Time Critical */, + __pa(ioei_rtas_buf), + RTAS_DATA_BUF_SIZE); + if (rtas_rc != 0) + break; + + event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf); + if (!event) + continue; + + atomic_notifier_call_chain(&pseries_ioei_notifier_list, + 0, event); + } + return IRQ_HANDLED; +} + +static int __init ioei_init(void) +{ + struct device_node *np; + + ioei_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); + if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + np = of_find_node_by_path("/event-sources/ibm,io-events"); + if (np) { + request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT"); + pr_info("IBM I/O event interrupts enabled\n"); + of_node_put(np); + } else { + return -ENODEV; + } + return 0; +} +machine_subsys_initcall(pseries, ioei_init); + diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c new file mode 100644 index 0000000000..496e16c588 --- /dev/null +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -0,0 +1,1742 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation + * + * Rewrite, cleanup: + * + * Copyright (C) 2004 Olof Johansson , IBM Corporation + * Copyright (C) 2006 Olof Johansson + * + * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +enum { + DDW_QUERY_PE_DMA_WIN = 0, + DDW_CREATE_PE_DMA_WIN = 1, + DDW_REMOVE_PE_DMA_WIN = 2, + + DDW_APPLICABLE_SIZE +}; + +enum { + DDW_EXT_SIZE = 0, + DDW_EXT_RESET_DMA_WIN = 1, + DDW_EXT_QUERY_OUT_SIZE = 2 +}; + +static struct iommu_table *iommu_pseries_alloc_table(int node) +{ + struct iommu_table *tbl; + + tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); + if (!tbl) + return NULL; + + INIT_LIST_HEAD_RCU(&tbl->it_group_list); + kref_init(&tbl->it_kref); + return tbl; +} + +static struct iommu_table_group *iommu_pseries_alloc_group(int node) +{ + struct iommu_table_group *table_group; + + table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node); + if (!table_group) + return NULL; + +#ifdef CONFIG_IOMMU_API + table_group->ops = &spapr_tce_table_group_ops; + table_group->pgsizes = SZ_4K; +#endif + + table_group->tables[0] = iommu_pseries_alloc_table(node); + if (table_group->tables[0]) + return table_group; + + kfree(table_group); + return NULL; +} + +static void iommu_pseries_free_group(struct iommu_table_group *table_group, + const char *node_name) +{ + if (!table_group) + return; + +#ifdef CONFIG_IOMMU_API + if (table_group->group) { + iommu_group_put(table_group->group); + BUG_ON(table_group->group); + } +#endif + + /* Default DMA window table is at index 0, while DDW at 1. SR-IOV + * adapters only have table on index 1. + */ + if (table_group->tables[0]) + iommu_tce_table_put(table_group->tables[0]); + + if (table_group->tables[1]) + iommu_tce_table_put(table_group->tables[1]); + + kfree(table_group); +} + +static int tce_build_pSeries(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + unsigned long attrs) +{ + u64 proto_tce; + __be64 *tcep; + u64 rpn; + const unsigned long tceshift = tbl->it_page_shift; + const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl); + + proto_tce = TCE_PCI_READ; // Read allowed + + if (direction != DMA_TO_DEVICE) + proto_tce |= TCE_PCI_WRITE; + + tcep = ((__be64 *)tbl->it_base) + index; + + while (npages--) { + /* can't move this out since we might cross MEMBLOCK boundary */ + rpn = __pa(uaddr) >> tceshift; + *tcep = cpu_to_be64(proto_tce | rpn << tceshift); + + uaddr += pagesize; + tcep++; + } + return 0; +} + + +static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) +{ + __be64 *tcep; + + tcep = ((__be64 *)tbl->it_base) + index; + + while (npages--) + *(tcep++) = 0; +} + +static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) +{ + __be64 *tcep; + + tcep = ((__be64 *)tbl->it_base) + index; + + return be64_to_cpu(*tcep); +} + +static void tce_free_pSeriesLP(unsigned long liobn, long, long, long); +static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); + +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + unsigned long attrs) +{ + u64 rc = 0; + u64 proto_tce, tce; + u64 rpn; + int ret = 0; + long tcenum_start = tcenum, npages_start = npages; + + rpn = __pa(uaddr) >> tceshift; + proto_tce = TCE_PCI_READ; + if (direction != DMA_TO_DEVICE) + proto_tce |= TCE_PCI_WRITE; + + while (npages--) { + tce = proto_tce | rpn << tceshift; + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); + + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_free_pSeriesLP(liobn, tcenum_start, tceshift, + (npages_start - (npages + 1))); + break; + } + + if (rc && printk_ratelimit()) { + printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)liobn); + printk("\ttcenum = 0x%llx\n", (u64)tcenum); + printk("\ttce val = 0x%llx\n", tce ); + dump_stack(); + } + + tcenum++; + rpn++; + } + return ret; +} + +static DEFINE_PER_CPU(__be64 *, tce_page); + +static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, + enum dma_data_direction direction, + unsigned long attrs) +{ + u64 rc = 0; + u64 proto_tce; + __be64 *tcep; + u64 rpn; + long l, limit; + long tcenum_start = tcenum, npages_start = npages; + int ret = 0; + unsigned long flags; + const unsigned long tceshift = tbl->it_page_shift; + + if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tceshift, npages, uaddr, + direction, attrs); + } + + local_irq_save(flags); /* to protect tcep and the page behind it */ + + tcep = __this_cpu_read(tce_page); + + /* This is safe to do since interrupts are off when we're called + * from iommu_alloc{,_sg}() + */ + if (!tcep) { + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + /* If allocation fails, fall back to the loop implementation */ + if (!tcep) { + local_irq_restore(flags); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tceshift, + npages, uaddr, direction, attrs); + } + __this_cpu_write(tce_page, tcep); + } + + rpn = __pa(uaddr) >> tceshift; + proto_tce = TCE_PCI_READ; + if (direction != DMA_TO_DEVICE) + proto_tce |= TCE_PCI_WRITE; + + /* We can map max one pageful of TCEs at a time */ + do { + /* + * Set up the page with TCE data, looping through and setting + * the values. + */ + limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE); + + for (l = 0; l < limit; l++) { + tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift); + rpn++; + } + + rc = plpar_tce_put_indirect((u64)tbl->it_index, + (u64)tcenum << tceshift, + (u64)__pa(tcep), + limit); + + npages -= limit; + tcenum += limit; + } while (npages > 0 && !rc); + + local_irq_restore(flags); + + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; + tce_freemulti_pSeriesLP(tbl, tcenum_start, + (npages_start - (npages + limit))); + return ret; + } + + if (rc && printk_ratelimit()) { + printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%llx\n", (u64)npages); + printk("\ttce[0] val = 0x%llx\n", tcep[0]); + dump_stack(); + } + return ret; +} + +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, + long npages) +{ + u64 rc; + + while (npages--) { + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0); + + if (rc && printk_ratelimit()) { + printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)liobn); + printk("\ttcenum = 0x%llx\n", (u64)tcenum); + dump_stack(); + } + + tcenum++; + } +} + + +static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +{ + u64 rc; + long rpages = npages; + unsigned long limit; + + if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) + return tce_free_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages); + + do { + limit = min_t(unsigned long, rpages, 512); + + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << tbl->it_page_shift, 0, limit); + + rpages -= limit; + tcenum += limit; + } while (rpages > 0 && !rc); + + if (rc && printk_ratelimit()) { + printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); + printk("\trc = %lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%llx\n", (u64)npages); + dump_stack(); + } +} + +static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) +{ + u64 rc; + unsigned long tce_ret; + + rc = plpar_tce_get((u64)tbl->it_index, + (u64)tcenum << tbl->it_page_shift, &tce_ret); + + if (rc && printk_ratelimit()) { + printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%llx\n", (u64)tcenum); + dump_stack(); + } + + return tce_ret; +} + +/* this is compatible with cells for the device tree property */ +struct dynamic_dma_window_prop { + __be32 liobn; /* tce table number */ + __be64 dma_base; /* address hi,lo */ + __be32 tce_shift; /* ilog2(tce_page_size) */ + __be32 window_shift; /* ilog2(tce_window_size) */ +}; + +struct dma_win { + struct device_node *device; + const struct dynamic_dma_window_prop *prop; + bool direct; + struct list_head list; +}; + +/* Dynamic DMA Window support */ +struct ddw_query_response { + u32 windows_available; + u64 largest_available_block; + u32 page_size; + u32 migration_capable; +}; + +struct ddw_create_response { + u32 liobn; + u32 addr_hi; + u32 addr_lo; +}; + +static LIST_HEAD(dma_win_list); +/* prevents races between memory on/offline and window creation */ +static DEFINE_SPINLOCK(dma_win_list_lock); +/* protects initializing window twice for same device */ +static DEFINE_MUTEX(dma_win_init_mutex); + +static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, + unsigned long num_pfn, const void *arg) +{ + const struct dynamic_dma_window_prop *maprange = arg; + int rc; + u64 tce_size, num_tce, dma_offset, next; + u32 tce_shift; + long limit; + + tce_shift = be32_to_cpu(maprange->tce_shift); + tce_size = 1ULL << tce_shift; + next = start_pfn << PAGE_SHIFT; + num_tce = num_pfn << PAGE_SHIFT; + + /* round back to the beginning of the tce page size */ + num_tce += next & (tce_size - 1); + next &= ~(tce_size - 1); + + /* covert to number of tces */ + num_tce |= tce_size - 1; + num_tce >>= tce_shift; + + do { + /* + * Set up the page with TCE data, looping through and setting + * the values. + */ + limit = min_t(long, num_tce, 512); + dma_offset = next + be64_to_cpu(maprange->dma_base); + + rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), + dma_offset, + 0, limit); + next += limit * tce_size; + num_tce -= limit; + } while (num_tce > 0 && !rc); + + return rc; +} + +static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, + unsigned long num_pfn, const void *arg) +{ + const struct dynamic_dma_window_prop *maprange = arg; + u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; + __be64 *tcep; + u32 tce_shift; + u64 rc = 0; + long l, limit; + + if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) { + unsigned long tceshift = be32_to_cpu(maprange->tce_shift); + unsigned long dmastart = (start_pfn << PAGE_SHIFT) + + be64_to_cpu(maprange->dma_base); + unsigned long tcenum = dmastart >> tceshift; + unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; + void *uaddr = __va(start_pfn << PAGE_SHIFT); + + return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), + tcenum, tceshift, npages, (unsigned long) uaddr, + DMA_BIDIRECTIONAL, 0); + } + + local_irq_disable(); /* to protect tcep and the page behind it */ + tcep = __this_cpu_read(tce_page); + + if (!tcep) { + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + if (!tcep) { + local_irq_enable(); + return -ENOMEM; + } + __this_cpu_write(tce_page, tcep); + } + + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; + + liobn = (u64)be32_to_cpu(maprange->liobn); + tce_shift = be32_to_cpu(maprange->tce_shift); + tce_size = 1ULL << tce_shift; + next = start_pfn << PAGE_SHIFT; + num_tce = num_pfn << PAGE_SHIFT; + + /* round back to the beginning of the tce page size */ + num_tce += next & (tce_size - 1); + next &= ~(tce_size - 1); + + /* covert to number of tces */ + num_tce |= tce_size - 1; + num_tce >>= tce_shift; + + /* We can map max one pageful of TCEs at a time */ + do { + /* + * Set up the page with TCE data, looping through and setting + * the values. + */ + limit = min_t(long, num_tce, 4096 / TCE_ENTRY_SIZE); + dma_offset = next + be64_to_cpu(maprange->dma_base); + + for (l = 0; l < limit; l++) { + tcep[l] = cpu_to_be64(proto_tce | next); + next += tce_size; + } + + rc = plpar_tce_put_indirect(liobn, + dma_offset, + (u64)__pa(tcep), + limit); + + num_tce -= limit; + } while (num_tce > 0 && !rc); + + /* error cleanup: caller will clear whole range */ + + local_irq_enable(); + return rc; +} + +static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, + unsigned long num_pfn, void *arg) +{ + return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); +} + +static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno, + unsigned long liobn, unsigned long win_addr, + unsigned long window_size, unsigned long page_shift, + void *base, struct iommu_table_ops *table_ops) +{ + tbl->it_busno = busno; + tbl->it_index = liobn; + tbl->it_offset = win_addr >> page_shift; + tbl->it_size = window_size >> page_shift; + tbl->it_page_shift = page_shift; + tbl->it_base = (unsigned long)base; + tbl->it_blocksize = 16; + tbl->it_type = TCE_PCI; + tbl->it_ops = table_ops; +} + +struct iommu_table_ops iommu_table_pseries_ops; + +static void iommu_table_setparms(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl) +{ + struct device_node *node; + const unsigned long *basep; + const u32 *sizep; + + /* Test if we are going over 2GB of DMA space */ + if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) { + udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + } + + node = phb->dn; + basep = of_get_property(node, "linux,tce-base", NULL); + sizep = of_get_property(node, "linux,tce-size", NULL); + if (basep == NULL || sizep == NULL) { + printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has " + "missing tce entries !\n", dn); + return; + } + + iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur, + phb->dma_window_size, IOMMU_PAGE_SHIFT_4K, + __va(*basep), &iommu_table_pseries_ops); + + if (!is_kdump_kernel()) + memset((void *)tbl->it_base, 0, *sizep); + + phb->dma_window_base_cur += phb->dma_window_size; +} + +struct iommu_table_ops iommu_table_lpar_multi_ops; + +/* + * iommu_table_setparms_lpar + * + * Function: On pSeries LPAR systems, return TCE table info, given a pci bus. + */ +static void iommu_table_setparms_lpar(struct pci_controller *phb, + struct device_node *dn, + struct iommu_table *tbl, + struct iommu_table_group *table_group, + const __be32 *dma_window) +{ + unsigned long offset, size, liobn; + + of_parse_dma_window(dn, dma_window, &liobn, &offset, &size); + + iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL, + &iommu_table_lpar_multi_ops); + + + table_group->tce32_start = offset; + table_group->tce32_size = size; +} + +struct iommu_table_ops iommu_table_pseries_ops = { + .set = tce_build_pSeries, + .clear = tce_free_pSeries, + .get = tce_get_pseries +}; + +static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) +{ + struct device_node *dn; + struct iommu_table *tbl; + struct device_node *isa_dn, *isa_dn_orig; + struct device_node *tmp; + struct pci_dn *pci; + int children; + + dn = pci_bus_to_OF_node(bus); + + pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn); + + if (bus->self) { + /* This is not a root bus, any setup will be done for the + * device-side of the bridge in iommu_dev_setup_pSeries(). + */ + return; + } + pci = PCI_DN(dn); + + /* Check if the ISA bus on the system is under + * this PHB. + */ + isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); + + while (isa_dn && isa_dn != dn) + isa_dn = isa_dn->parent; + + of_node_put(isa_dn_orig); + + /* Count number of direct PCI children of the PHB. */ + for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) + children++; + + pr_debug("Children: %d\n", children); + + /* Calculate amount of DMA window per slot. Each window must be + * a power of two (due to pci_alloc_consistent requirements). + * + * Keep 256MB aside for PHBs with ISA. + */ + + if (!isa_dn) { + /* No ISA/IDE - just set window size and return */ + pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ + + while (pci->phb->dma_window_size * children > 0x80000000ul) + pci->phb->dma_window_size >>= 1; + pr_debug("No ISA/IDE, window size is 0x%llx\n", + pci->phb->dma_window_size); + pci->phb->dma_window_base_cur = 0; + + return; + } + + /* If we have ISA, then we probably have an IDE + * controller too. Allocate a 128MB table but + * skip the first 128MB to avoid stepping on ISA + * space. + */ + pci->phb->dma_window_size = 0x8000000ul; + pci->phb->dma_window_base_cur = 0x8000000ul; + + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; + + iommu_table_setparms(pci->phb, dn, tbl); + + if (!iommu_init_table(tbl, pci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + + /* Divide the rest (1.75GB) among the children */ + pci->phb->dma_window_size = 0x80000000ul; + while (pci->phb->dma_window_size * children > 0x70000000ul) + pci->phb->dma_window_size >>= 1; + + pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size); +} + +#ifdef CONFIG_IOMMU_API +static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned + long *tce, enum dma_data_direction *direction) +{ + long rc; + unsigned long ioba = (unsigned long) index << tbl->it_page_shift; + unsigned long flags, oldtce = 0; + u64 proto_tce = iommu_direction_to_tce_perm(*direction); + unsigned long newtce = *tce | proto_tce; + + spin_lock_irqsave(&tbl->large_pool.lock, flags); + + rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce); + if (!rc) + rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce); + + if (!rc) { + *direction = iommu_tce_direction(oldtce); + *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); + } + + spin_unlock_irqrestore(&tbl->large_pool.lock, flags); + + return rc; +} +#endif + +struct iommu_table_ops iommu_table_lpar_multi_ops = { + .set = tce_buildmulti_pSeriesLP, +#ifdef CONFIG_IOMMU_API + .xchg_no_kill = tce_exchange_pseries, +#endif + .clear = tce_freemulti_pSeriesLP, + .get = tce_get_pSeriesLP +}; + +/* + * Find nearest ibm,dma-window (default DMA window) or direct DMA window or + * dynamic 64bit DMA window, walking up the device tree. + */ +static struct device_node *pci_dma_find(struct device_node *dn, + const __be32 **dma_window) +{ + const __be32 *dw = NULL; + + for ( ; dn && PCI_DN(dn); dn = dn->parent) { + dw = of_get_property(dn, "ibm,dma-window", NULL); + if (dw) { + if (dma_window) + *dma_window = dw; + return dn; + } + dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (dw) + return dn; + dw = of_get_property(dn, DMA64_PROPNAME, NULL); + if (dw) + return dn; + } + + return NULL; +} + +static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) +{ + struct iommu_table *tbl; + struct device_node *dn, *pdn; + struct pci_dn *ppci; + const __be32 *dma_window = NULL; + + dn = pci_bus_to_OF_node(bus); + + pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", + dn); + + pdn = pci_dma_find(dn, &dma_window); + + if (dma_window == NULL) + pr_debug(" no ibm,dma-window property !\n"); + + ppci = PCI_DN(pdn); + + pr_debug(" parent is %pOF, iommu_table: 0x%p\n", + pdn, ppci->table_group); + + if (!ppci->table_group) { + ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); + tbl = ppci->table_group->tables[0]; + if (dma_window) { + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, + ppci->table_group, dma_window); + + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + } + iommu_register_group(ppci->table_group, + pci_domain_nr(bus), 0); + pr_debug(" created table: %p\n", ppci->table_group); + } +} + + +static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) +{ + struct device_node *dn; + struct iommu_table *tbl; + + pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); + + dn = dev->dev.of_node; + + /* If we're the direct child of a root bus, then we need to allocate + * an iommu table ourselves. The bus setup code should have setup + * the window sizes already. + */ + if (!dev->bus->self) { + struct pci_controller *phb = PCI_DN(dn)->phb; + + pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); + PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node); + tbl = PCI_DN(dn)->table_group->tables[0]; + iommu_table_setparms(phb, dn, tbl); + + if (!iommu_init_table(tbl, phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + + set_iommu_table_base(&dev->dev, tbl); + return; + } + + /* If this device is further down the bus tree, search upwards until + * an already allocated iommu table is found and use that. + */ + + while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL) + dn = dn->parent; + + if (dn && PCI_DN(dn)) + set_iommu_table_base(&dev->dev, + PCI_DN(dn)->table_group->tables[0]); + else + printk(KERN_WARNING "iommu: Device %s has no iommu table\n", + pci_name(dev)); +} + +static int __read_mostly disable_ddw; + +static int __init disable_ddw_setup(char *str) +{ + disable_ddw = 1; + printk(KERN_INFO "ppc iommu: disabling ddw.\n"); + + return 0; +} + +early_param("disable_ddw", disable_ddw_setup); + +static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp) +{ + int ret; + + ret = tce_clearrange_multi_pSeriesLP(0, + 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); + if (ret) + pr_warn("%pOF failed to clear tces in window.\n", + np); + else + pr_debug("%pOF successfully cleared tces in window.\n", + np); +} + +/* + * Call only if DMA window is clean. + */ +static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn) +{ + int ret; + + ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn); + if (ret) + pr_warn("%pOF: failed to remove DMA window: rtas returned " + "%d to ibm,remove-pe-dma-window(%x) %llx\n", + np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); + else + pr_debug("%pOF: successfully removed DMA window: rtas returned " + "%d to ibm,remove-pe-dma-window(%x) %llx\n", + np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn); +} + +static void remove_dma_window(struct device_node *np, u32 *ddw_avail, + struct property *win) +{ + struct dynamic_dma_window_prop *dwp; + u64 liobn; + + dwp = win->value; + liobn = (u64)be32_to_cpu(dwp->liobn); + + clean_dma_window(np, dwp); + __remove_dma_window(np, ddw_avail, liobn); +} + +static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name) +{ + struct property *win; + u32 ddw_avail[DDW_APPLICABLE_SIZE]; + int ret = 0; + + win = of_find_property(np, win_name, NULL); + if (!win) + return -EINVAL; + + ret = of_property_read_u32_array(np, "ibm,ddw-applicable", + &ddw_avail[0], DDW_APPLICABLE_SIZE); + if (ret) + return 0; + + + if (win->length >= sizeof(struct dynamic_dma_window_prop)) + remove_dma_window(np, ddw_avail, win); + + if (!remove_prop) + return 0; + + ret = of_remove_property(np, win); + if (ret) + pr_warn("%pOF: failed to remove DMA window property: %d\n", + np, ret); + return 0; +} + +static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift, + bool *direct_mapping) +{ + struct dma_win *window; + const struct dynamic_dma_window_prop *dma64; + bool found = false; + + spin_lock(&dma_win_list_lock); + /* check if we already created a window and dupe that config if so */ + list_for_each_entry(window, &dma_win_list, list) { + if (window->device == pdn) { + dma64 = window->prop; + *dma_addr = be64_to_cpu(dma64->dma_base); + *window_shift = be32_to_cpu(dma64->window_shift); + *direct_mapping = window->direct; + found = true; + break; + } + } + spin_unlock(&dma_win_list_lock); + + return found; +} + +static struct dma_win *ddw_list_new_entry(struct device_node *pdn, + const struct dynamic_dma_window_prop *dma64) +{ + struct dma_win *window; + + window = kzalloc(sizeof(*window), GFP_KERNEL); + if (!window) + return NULL; + + window->device = pdn; + window->prop = dma64; + window->direct = false; + + return window; +} + +static void find_existing_ddw_windows_named(const char *name) +{ + int len; + struct device_node *pdn; + struct dma_win *window; + const struct dynamic_dma_window_prop *dma64; + + for_each_node_with_property(pdn, name) { + dma64 = of_get_property(pdn, name, &len); + if (!dma64 || len < sizeof(*dma64)) { + remove_ddw(pdn, true, name); + continue; + } + + window = ddw_list_new_entry(pdn, dma64); + if (!window) { + of_node_put(pdn); + break; + } + + spin_lock(&dma_win_list_lock); + list_add(&window->list, &dma_win_list); + spin_unlock(&dma_win_list_lock); + } +} + +static int find_existing_ddw_windows(void) +{ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + find_existing_ddw_windows_named(DIRECT64_PROPNAME); + find_existing_ddw_windows_named(DMA64_PROPNAME); + + return 0; +} +machine_arch_initcall(pseries, find_existing_ddw_windows); + +/** + * ddw_read_ext - Get the value of an DDW extension + * @np: device node from which the extension value is to be read. + * @extnum: index number of the extension. + * @value: pointer to return value, modified when extension is available. + * + * Checks if "ibm,ddw-extensions" exists for this node, and get the value + * on index 'extnum'. + * It can be used only to check if a property exists, passing value == NULL. + * + * Returns: + * 0 if extension successfully read + * -EINVAL if the "ibm,ddw-extensions" does not exist, + * -ENODATA if "ibm,ddw-extensions" does not have a value, and + * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension. + */ +static inline int ddw_read_ext(const struct device_node *np, int extnum, + u32 *value) +{ + static const char propname[] = "ibm,ddw-extensions"; + u32 count; + int ret; + + ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count); + if (ret) + return ret; + + if (count < extnum) + return -EOVERFLOW; + + if (!value) + value = &count; + + return of_property_read_u32_index(np, propname, extnum, value); +} + +static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail, + struct ddw_query_response *query, + struct device_node *parent) +{ + struct device_node *dn; + struct pci_dn *pdn; + u32 cfg_addr, ext_query, query_out[5]; + u64 buid; + int ret, out_sz; + + /* + * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many + * output parameters ibm,query-pe-dma-windows will have, ranging from + * 5 to 6. + */ + ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query); + if (!ret && ext_query == 1) + out_sz = 6; + else + out_sz = 5; + + /* + * Get the config address and phb buid of the PE window. + * Rely on eeh to retrieve this for us. + * Retrieve them from the pci device, not the node with the + * dma-window property + */ + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); + + ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out, + cfg_addr, BUID_HI(buid), BUID_LO(buid)); + + switch (out_sz) { + case 5: + query->windows_available = query_out[0]; + query->largest_available_block = query_out[1]; + query->page_size = query_out[2]; + query->migration_capable = query_out[3]; + break; + case 6: + query->windows_available = query_out[0]; + query->largest_available_block = ((u64)query_out[1] << 32) | + query_out[2]; + query->page_size = query_out[3]; + query->migration_capable = query_out[4]; + break; + } + + dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n", + ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid), + BUID_LO(buid), ret, query->largest_available_block, + query->page_size, query->windows_available); + + return ret; +} + +static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail, + struct ddw_create_response *create, int page_shift, + int window_shift) +{ + struct device_node *dn; + struct pci_dn *pdn; + u32 cfg_addr; + u64 buid; + int ret; + + /* + * Get the config address and phb buid of the PE window. + * Rely on eeh to retrieve this for us. + * Retrieve them from the pci device, not the node with the + * dma-window property + */ + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8)); + + do { + /* extra outputs are LIOBN and dma-addr (hi, lo) */ + ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4, + (u32 *)create, cfg_addr, BUID_HI(buid), + BUID_LO(buid), page_shift, window_shift); + } while (rtas_busy_delay(ret)); + dev_info(&dev->dev, + "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " + "(liobn = 0x%x starting addr = %x %x)\n", + ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid), + BUID_LO(buid), page_shift, window_shift, ret, create->liobn, + create->addr_hi, create->addr_lo); + + return ret; +} + +struct failed_ddw_pdn { + struct device_node *pdn; + struct list_head list; +}; + +static LIST_HEAD(failed_ddw_pdn_list); + +static phys_addr_t ddw_memory_hotplug_max(void) +{ + resource_size_t max_addr = memory_hotplug_max(); + struct device_node *memory; + + for_each_node_by_type(memory, "memory") { + struct resource res; + + if (of_address_to_resource(memory, 0, &res)) + continue; + + max_addr = max_t(resource_size_t, max_addr, res.end + 1); + } + + return max_addr; +} + +/* + * Platforms supporting the DDW option starting with LoPAR level 2.7 implement + * ibm,ddw-extensions, which carries the rtas token for + * ibm,reset-pe-dma-windows. + * That rtas-call can be used to restore the default DMA window for the device. + */ +static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn) +{ + int ret; + u32 cfg_addr, reset_dma_win; + u64 buid; + struct device_node *dn; + struct pci_dn *pdn; + + ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win); + if (ret) + return; + + dn = pci_device_to_OF_node(dev); + pdn = PCI_DN(dn); + buid = pdn->phb->buid; + cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8); + + ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid), + BUID_LO(buid)); + if (ret) + dev_info(&dev->dev, + "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ", + reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid), + ret); +} + +/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */ +static int iommu_get_page_shift(u32 query_page_size) +{ + /* Supported IO page-sizes according to LoPAR, note that 2M is out of order */ + const int shift[] = { + __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M), + __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M), + __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M) + }; + + int i = ARRAY_SIZE(shift) - 1; + int ret = 0; + + /* + * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field: + * - bit 31 means 4k pages are supported, + * - bit 30 means 64k pages are supported, and so on. + * Larger pagesizes map more memory with the same amount of TCEs, so start probing them. + */ + for (; i >= 0 ; i--) { + if (query_page_size & (1 << i)) + ret = max(ret, shift[i]); + } + + return ret; +} + +static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr, + u32 page_shift, u32 window_shift) +{ + struct dynamic_dma_window_prop *ddwprop; + struct property *win64; + + win64 = kzalloc(sizeof(*win64), GFP_KERNEL); + if (!win64) + return NULL; + + win64->name = kstrdup(propname, GFP_KERNEL); + ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL); + win64->value = ddwprop; + win64->length = sizeof(*ddwprop); + if (!win64->name || !win64->value) { + kfree(win64->name); + kfree(win64->value); + kfree(win64); + return NULL; + } + + ddwprop->liobn = cpu_to_be32(liobn); + ddwprop->dma_base = cpu_to_be64(dma_addr); + ddwprop->tce_shift = cpu_to_be32(page_shift); + ddwprop->window_shift = cpu_to_be32(window_shift); + + return win64; +} + +/* + * If the PE supports dynamic dma windows, and there is space for a table + * that can map all pages in a linear offset, then setup such a table, + * and record the dma-offset in the struct device. + * + * dev: the pci device we are checking + * pdn: the parent pe node with the ibm,dma_window property + * Future: also check if we can remap the base window for our base page size + * + * returns true if can map all pages (direct mapping), false otherwise.. + */ +static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) +{ + int len = 0, ret; + int max_ram_len = order_base_2(ddw_memory_hotplug_max()); + struct ddw_query_response query; + struct ddw_create_response create; + int page_shift; + u64 win_addr; + const char *win_name; + struct device_node *dn; + u32 ddw_avail[DDW_APPLICABLE_SIZE]; + struct dma_win *window; + struct property *win64; + struct failed_ddw_pdn *fpdn; + bool default_win_removed = false, direct_mapping = false; + bool pmem_present; + struct pci_dn *pci = PCI_DN(pdn); + struct property *default_win = NULL; + + dn = of_find_node_by_type(NULL, "ibm,pmemory"); + pmem_present = dn != NULL; + of_node_put(dn); + + mutex_lock(&dma_win_init_mutex); + + if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping)) + goto out_unlock; + + /* + * If we already went through this for a previous function of + * the same device and failed, we don't want to muck with the + * DMA window again, as it will race with in-flight operations + * and can lead to EEHs. The above mutex protects access to the + * list. + */ + list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) { + if (fpdn->pdn == pdn) + goto out_unlock; + } + + /* + * the ibm,ddw-applicable property holds the tokens for: + * ibm,query-pe-dma-window + * ibm,create-pe-dma-window + * ibm,remove-pe-dma-window + * for the given node in that order. + * the property is actually in the parent, not the PE + */ + ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable", + &ddw_avail[0], DDW_APPLICABLE_SIZE); + if (ret) + goto out_failed; + + /* + * Query if there is a second window of size to map the + * whole partition. Query returns number of windows, largest + * block assigned to PE (partition endpoint), and two bitmasks + * of page sizes: supported and supported for migrate-dma. + */ + dn = pci_device_to_OF_node(dev); + ret = query_ddw(dev, ddw_avail, &query, pdn); + if (ret != 0) + goto out_failed; + + /* + * If there is no window available, remove the default DMA window, + * if it's present. This will make all the resources available to the + * new DDW window. + * If anything fails after this, we need to restore it, so also check + * for extensions presence. + */ + if (query.windows_available == 0) { + int reset_win_ext; + + /* DDW + IOMMU on single window may fail if there is any allocation */ + if (iommu_table_in_use(pci->table_group->tables[0])) { + dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); + goto out_failed; + } + + default_win = of_find_property(pdn, "ibm,dma-window", NULL); + if (!default_win) + goto out_failed; + + reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL); + if (reset_win_ext) + goto out_failed; + + remove_dma_window(pdn, ddw_avail, default_win); + default_win_removed = true; + + /* Query again, to check if the window is available */ + ret = query_ddw(dev, ddw_avail, &query, pdn); + if (ret != 0) + goto out_failed; + + if (query.windows_available == 0) { + /* no windows are available for this device. */ + dev_dbg(&dev->dev, "no free dynamic windows"); + goto out_failed; + } + } + + page_shift = iommu_get_page_shift(query.page_size); + if (!page_shift) { + dev_dbg(&dev->dev, "no supported page size in mask %x", + query.page_size); + goto out_failed; + } + + + /* + * The "ibm,pmemory" can appear anywhere in the address space. + * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS + * for the upper limit and fallback to max RAM otherwise but this + * disables device::dma_ops_bypass. + */ + len = max_ram_len; + if (pmem_present) { + if (query.largest_available_block >= + (1ULL << (MAX_PHYSMEM_BITS - page_shift))) + len = MAX_PHYSMEM_BITS; + else + dev_info(&dev->dev, "Skipping ibm,pmemory"); + } + + /* check if the available block * number of ptes will map everything */ + if (query.largest_available_block < (1ULL << (len - page_shift))) { + dev_dbg(&dev->dev, + "can't map partition max 0x%llx with %llu %llu-sized pages\n", + 1ULL << len, + query.largest_available_block, + 1ULL << page_shift); + + len = order_base_2(query.largest_available_block << page_shift); + win_name = DMA64_PROPNAME; + } else { + direct_mapping = !default_win_removed || + (len == MAX_PHYSMEM_BITS) || + (!pmem_present && (len == max_ram_len)); + win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME; + } + + ret = create_ddw(dev, ddw_avail, &create, page_shift, len); + if (ret != 0) + goto out_failed; + + dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n", + create.liobn, dn); + + win_addr = ((u64)create.addr_hi << 32) | create.addr_lo; + win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len); + + if (!win64) { + dev_info(&dev->dev, + "couldn't allocate property, property name, or value\n"); + goto out_remove_win; + } + + ret = of_add_property(pdn, win64); + if (ret) { + dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d", + pdn, ret); + goto out_free_prop; + } + + window = ddw_list_new_entry(pdn, win64->value); + if (!window) + goto out_del_prop; + + if (direct_mapping) { + window->direct = true; + + /* DDW maps the whole partition, so enable direct DMA mapping */ + ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, + win64->value, tce_setrange_multi_pSeriesLP_walk); + if (ret) { + dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n", + dn, ret); + + /* Make sure to clean DDW if any TCE was set*/ + clean_dma_window(pdn, win64->value); + goto out_del_list; + } + } else { + struct iommu_table *newtbl; + int i; + unsigned long start = 0, end = 0; + + window->direct = false; + + for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) { + const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM; + + /* Look for MMIO32 */ + if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) { + start = pci->phb->mem_resources[i].start; + end = pci->phb->mem_resources[i].end; + break; + } + } + + /* New table for using DDW instead of the default DMA window */ + newtbl = iommu_pseries_alloc_table(pci->phb->node); + if (!newtbl) { + dev_dbg(&dev->dev, "couldn't create new IOMMU table\n"); + goto out_del_list; + } + + iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr, + 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); + iommu_init_table(newtbl, pci->phb->node, start, end); + + pci->table_group->tables[1] = newtbl; + + set_iommu_table_base(&dev->dev, newtbl); + } + + if (default_win_removed) { + iommu_tce_table_put(pci->table_group->tables[0]); + pci->table_group->tables[0] = NULL; + + /* default_win is valid here because default_win_removed == true */ + of_remove_property(pdn, default_win); + dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); + } + + spin_lock(&dma_win_list_lock); + list_add(&window->list, &dma_win_list); + spin_unlock(&dma_win_list_lock); + + dev->dev.archdata.dma_offset = win_addr; + goto out_unlock; + +out_del_list: + kfree(window); + +out_del_prop: + of_remove_property(pdn, win64); + +out_free_prop: + kfree(win64->name); + kfree(win64->value); + kfree(win64); + +out_remove_win: + /* DDW is clean, so it's ok to call this directly. */ + __remove_dma_window(pdn, ddw_avail, create.liobn); + +out_failed: + if (default_win_removed) + reset_dma_window(dev, pdn); + + fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL); + if (!fpdn) + goto out_unlock; + fpdn->pdn = pdn; + list_add(&fpdn->list, &failed_ddw_pdn_list); + +out_unlock: + mutex_unlock(&dma_win_init_mutex); + + /* + * If we have persistent memory and the window size is only as big + * as RAM, then we failed to create a window to cover persistent + * memory and need to set the DMA limit. + */ + if (pmem_present && direct_mapping && len == max_ram_len) + dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len); + + return direct_mapping; +} + +static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) +{ + struct device_node *pdn, *dn; + struct iommu_table *tbl; + const __be32 *dma_window = NULL; + struct pci_dn *pci; + + pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); + + /* dev setup for LPAR is a little tricky, since the device tree might + * contain the dma-window properties per-device and not necessarily + * for the bus. So we need to search upwards in the tree until we + * either hit a dma-window property, OR find a parent with a table + * already allocated. + */ + dn = pci_device_to_OF_node(dev); + pr_debug(" node is %pOF\n", dn); + + pdn = pci_dma_find(dn, &dma_window); + if (!pdn || !PCI_DN(pdn)) { + printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " + "no DMA window found for pci dev=%s dn=%pOF\n", + pci_name(dev), dn); + return; + } + pr_debug(" parent is %pOF\n", pdn); + + pci = PCI_DN(pdn); + if (!pci->table_group) { + pci->table_group = iommu_pseries_alloc_group(pci->phb->node); + tbl = pci->table_group->tables[0]; + iommu_table_setparms_lpar(pci->phb, pdn, tbl, + pci->table_group, dma_window); + + iommu_init_table(tbl, pci->phb->node, 0, 0); + iommu_register_group(pci->table_group, + pci_domain_nr(pci->phb->bus), 0); + pr_debug(" created table: %p\n", pci->table_group); + } else { + pr_debug(" found DMA window, table: %p\n", pci->table_group); + } + + set_iommu_table_base(&dev->dev, pci->table_group->tables[0]); + iommu_add_device(pci->table_group, &dev->dev); +} + +static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) +{ + struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; + + /* only attempt to use a new window if 64-bit DMA is requested */ + if (dma_mask < DMA_BIT_MASK(64)) + return false; + + dev_dbg(&pdev->dev, "node is %pOF\n", dn); + + /* + * the device tree might contain the dma-window properties + * per-device and not necessarily for the bus. So we need to + * search upwards in the tree until we either hit a dma-window + * property, OR find a parent with a table already allocated. + */ + pdn = pci_dma_find(dn, NULL); + if (pdn && PCI_DN(pdn)) + return enable_ddw(pdev, pdn); + + return false; +} + +static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct dma_win *window; + struct memory_notify *arg = data; + int ret = 0; + + switch (action) { + case MEM_GOING_ONLINE: + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { + if (window->direct) { + ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, + arg->nr_pages, window->prop); + } + /* XXX log error */ + } + spin_unlock(&dma_win_list_lock); + break; + case MEM_CANCEL_ONLINE: + case MEM_OFFLINE: + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { + if (window->direct) { + ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, + arg->nr_pages, window->prop); + } + /* XXX log error */ + } + spin_unlock(&dma_win_list_lock); + break; + default: + break; + } + if (ret && action != MEM_CANCEL_ONLINE) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static struct notifier_block iommu_mem_nb = { + .notifier_call = iommu_mem_notifier, +}; + +static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + int err = NOTIFY_OK; + struct of_reconfig_data *rd = data; + struct device_node *np = rd->dn; + struct pci_dn *pci = PCI_DN(np); + struct dma_win *window; + + switch (action) { + case OF_RECONFIG_DETACH_NODE: + /* + * Removing the property will invoke the reconfig + * notifier again, which causes dead-lock on the + * read-write semaphore of the notifier chain. So + * we have to remove the property when releasing + * the device node. + */ + if (remove_ddw(np, false, DIRECT64_PROPNAME)) + remove_ddw(np, false, DMA64_PROPNAME); + + if (pci && pci->table_group) + iommu_pseries_free_group(pci->table_group, + np->full_name); + + spin_lock(&dma_win_list_lock); + list_for_each_entry(window, &dma_win_list, list) { + if (window->device == np) { + list_del(&window->list); + kfree(window); + break; + } + } + spin_unlock(&dma_win_list_lock); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block iommu_reconfig_nb = { + .notifier_call = iommu_reconfig_notifier, +}; + +/* These are called very early. */ +void __init iommu_init_early_pSeries(void) +{ + if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) + return; + + if (firmware_has_feature(FW_FEATURE_LPAR)) { + pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP; + pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP; + if (!disable_ddw) + pseries_pci_controller_ops.iommu_bypass_supported = + iommu_bypass_supported_pSeriesLP; + } else { + pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries; + pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries; + } + + + of_reconfig_notifier_register(&iommu_reconfig_nb); + register_memory_notifier(&iommu_mem_nb); + + set_pci_dma_ops(&dma_iommu_ops); +} + +static int __init disable_multitce(char *str) +{ + if (strcmp(str, "off") == 0 && + firmware_has_feature(FW_FEATURE_LPAR) && + (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) || + firmware_has_feature(FW_FEATURE_STUFF_TCE))) { + printk(KERN_INFO "Disabling MULTITCE firmware feature\n"); + powerpc_firmware_features &= + ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE); + } + return 1; +} + +__setup("multitce=", disable_multitce); + +#ifdef CONFIG_SPAPR_TCE_IOMMU +struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, + struct pci_dev *pdev) +{ + struct device_node *pdn, *dn = pdev->dev.of_node; + struct iommu_group *grp; + struct pci_dn *pci; + + pdn = pci_dma_find(dn, NULL); + if (!pdn || !PCI_DN(pdn)) + return ERR_PTR(-ENODEV); + + pci = PCI_DN(pdn); + if (!pci->table_group) + return ERR_PTR(-ENODEV); + + grp = pci->table_group->group; + if (!grp) + return ERR_PTR(-ENODEV); + + return iommu_group_ref_get(grp); +} +#endif diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c new file mode 100644 index 0000000000..096d09ed89 --- /dev/null +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2006 Michael Ellerman, IBM Corporation + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +void pseries_kexec_cpu_down(int crash_shutdown, int secondary) +{ + /* + * Don't risk a hypervisor call if we're crashing + * XXX: Why? The hypervisor is not crashing. It might be better + * to at least attempt unregister to avoid the hypervisor stepping + * on our memory. + */ + if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { + int ret; + int cpu = smp_processor_id(); + int hwcpu = hard_smp_processor_id(); + + if (get_lppaca()->dtl_enable_mask) { + ret = unregister_dtl(hwcpu); + if (ret) { + pr_err("WARNING: DTL deregistration for cpu " + "%d (hw %d) failed with %d\n", + cpu, hwcpu, ret); + } + } + + ret = unregister_slb_shadow(hwcpu); + if (ret) { + pr_err("WARNING: SLB shadow buffer deregistration " + "for cpu %d (hw %d) failed with %d\n", + cpu, hwcpu, ret); + } + + ret = unregister_vpa(hwcpu); + if (ret) { + pr_err("WARNING: VPA deregistration for cpu %d " + "(hw %d) failed with %d\n", cpu, hwcpu, ret); + } + } + + if (xive_enabled()) { + xive_teardown_cpu(); + + if (!secondary) + xive_shutdown(); + } else + xics_kexec_teardown_cpu(secondary); +} + +void pseries_machine_kexec(struct kimage *image) +{ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + pseries_disable_reloc_on_exc(); + + default_machine_kexec(image); +} diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c new file mode 100644 index 0000000000..d4d6de0628 --- /dev/null +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -0,0 +1,2026 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * pSeries_lpar.c + * Copyright (C) 2001 Todd Inglett, IBM Corporation + * + * pSeries LPAR support. + */ + +/* Enables debugging of low-level hash table routines - careful! */ +#undef DEBUG +#define pr_fmt(fmt) "lpar: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +/* Flag bits for H_BULK_REMOVE */ +#define HBR_REQUEST 0x4000000000000000UL +#define HBR_RESPONSE 0x8000000000000000UL +#define HBR_END 0xc000000000000000UL +#define HBR_AVPN 0x0200000000000000UL +#define HBR_ANDCOND 0x0100000000000000UL + + +/* in hvCall.S */ +EXPORT_SYMBOL(plpar_hcall); +EXPORT_SYMBOL(plpar_hcall9); +EXPORT_SYMBOL(plpar_hcall_norets); + +#ifdef CONFIG_PPC_64S_HASH_MMU +/* + * H_BLOCK_REMOVE supported block size for this page size in segment who's base + * page size is that page size. + * + * The first index is the segment base page size, the second one is the actual + * page size. + */ +static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init; +#endif + +/* + * Due to the involved complexity, and that the current hypervisor is only + * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE + * buffer size to 8 size block. + */ +#define HBLKRM_SUPPORTED_BLOCK_SIZE 8 + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +static u8 dtl_mask = DTL_LOG_PREEMPT; +#else +static u8 dtl_mask; +#endif + +void alloc_dtl_buffers(unsigned long *time_limit) +{ + int cpu; + struct paca_struct *pp; + struct dtl_entry *dtl; + + for_each_possible_cpu(cpu) { + pp = paca_ptrs[cpu]; + if (pp->dispatch_log) + continue; + dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); + if (!dtl) { + pr_warn("Failed to allocate dispatch trace log for cpu %d\n", + cpu); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + pr_warn("Stolen time statistics will be unreliable\n"); +#endif + break; + } + + pp->dtl_ridx = 0; + pp->dispatch_log = dtl; + pp->dispatch_log_end = dtl + N_DISPATCH_LOG; + pp->dtl_curr = dtl; + + if (time_limit && time_after(jiffies, *time_limit)) { + cond_resched(); + *time_limit = jiffies + HZ; + } + } +} + +void register_dtl_buffer(int cpu) +{ + long ret; + struct paca_struct *pp; + struct dtl_entry *dtl; + int hwcpu = get_hard_smp_processor_id(cpu); + + pp = paca_ptrs[cpu]; + dtl = pp->dispatch_log; + if (dtl && dtl_mask) { + pp->dtl_ridx = 0; + pp->dtl_curr = dtl; + lppaca_of(cpu).dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); + ret = register_dtl(hwcpu, __pa(dtl)); + if (ret) + pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n", + cpu, hwcpu, ret); + + lppaca_of(cpu).dtl_enable_mask = dtl_mask; + } +} + +#ifdef CONFIG_PPC_SPLPAR +struct dtl_worker { + struct delayed_work work; + int cpu; +}; + +struct vcpu_dispatch_data { + int last_disp_cpu; + + int total_disp; + + int same_cpu_disp; + int same_chip_disp; + int diff_chip_disp; + int far_chip_disp; + + int numa_home_disp; + int numa_remote_disp; + int numa_far_disp; +}; + +/* + * This represents the number of cpus in the hypervisor. Since there is no + * architected way to discover the number of processors in the host, we + * provision for dealing with NR_CPUS. This is currently 2048 by default, and + * is sufficient for our purposes. This will need to be tweaked if + * CONFIG_NR_CPUS is changed. + */ +#define NR_CPUS_H NR_CPUS + +DEFINE_RWLOCK(dtl_access_lock); +static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data); +static DEFINE_PER_CPU(u64, dtl_entry_ridx); +static DEFINE_PER_CPU(struct dtl_worker, dtl_workers); +static enum cpuhp_state dtl_worker_state; +static DEFINE_MUTEX(dtl_enable_mutex); +static int vcpudispatch_stats_on __read_mostly; +static int vcpudispatch_stats_freq = 50; +static __be32 *vcpu_associativity, *pcpu_associativity; + + +static void free_dtl_buffers(unsigned long *time_limit) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + int cpu; + struct paca_struct *pp; + + for_each_possible_cpu(cpu) { + pp = paca_ptrs[cpu]; + if (!pp->dispatch_log) + continue; + kmem_cache_free(dtl_cache, pp->dispatch_log); + pp->dtl_ridx = 0; + pp->dispatch_log = 0; + pp->dispatch_log_end = 0; + pp->dtl_curr = 0; + + if (time_limit && time_after(jiffies, *time_limit)) { + cond_resched(); + *time_limit = jiffies + HZ; + } + } +#endif +} + +static int init_cpu_associativity(void) +{ + vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core, + VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL); + pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core, + VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL); + + if (!vcpu_associativity || !pcpu_associativity) { + pr_err("error allocating memory for associativity information\n"); + return -ENOMEM; + } + + return 0; +} + +static void destroy_cpu_associativity(void) +{ + kfree(vcpu_associativity); + kfree(pcpu_associativity); + vcpu_associativity = pcpu_associativity = 0; +} + +static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag) +{ + __be32 *assoc; + int rc = 0; + + assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE]; + if (!assoc[0]) { + rc = hcall_vphn(cpu, flag, &assoc[0]); + if (rc) + return NULL; + } + + return assoc; +} + +static __be32 *get_pcpu_associativity(int cpu) +{ + return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU); +} + +static __be32 *get_vcpu_associativity(int cpu) +{ + return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU); +} + +static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu) +{ + __be32 *last_disp_cpu_assoc, *cur_disp_cpu_assoc; + + if (last_disp_cpu >= NR_CPUS_H || cur_disp_cpu >= NR_CPUS_H) + return -EINVAL; + + last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu); + cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu); + + if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc) + return -EIO; + + return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc); +} + +static int cpu_home_node_dispatch_distance(int disp_cpu) +{ + __be32 *disp_cpu_assoc, *vcpu_assoc; + int vcpu_id = smp_processor_id(); + + if (disp_cpu >= NR_CPUS_H) { + pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n", + disp_cpu, NR_CPUS_H); + return -EINVAL; + } + + disp_cpu_assoc = get_pcpu_associativity(disp_cpu); + vcpu_assoc = get_vcpu_associativity(vcpu_id); + + if (!disp_cpu_assoc || !vcpu_assoc) + return -EIO; + + return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc); +} + +static void update_vcpu_disp_stat(int disp_cpu) +{ + struct vcpu_dispatch_data *disp; + int distance; + + disp = this_cpu_ptr(&vcpu_disp_data); + if (disp->last_disp_cpu == -1) { + disp->last_disp_cpu = disp_cpu; + return; + } + + disp->total_disp++; + + if (disp->last_disp_cpu == disp_cpu || + (cpu_first_thread_sibling(disp->last_disp_cpu) == + cpu_first_thread_sibling(disp_cpu))) + disp->same_cpu_disp++; + else { + distance = cpu_relative_dispatch_distance(disp->last_disp_cpu, + disp_cpu); + if (distance < 0) + pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n", + smp_processor_id()); + else { + switch (distance) { + case 0: + disp->same_chip_disp++; + break; + case 1: + disp->diff_chip_disp++; + break; + case 2: + disp->far_chip_disp++; + break; + default: + pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n", + smp_processor_id(), + disp->last_disp_cpu, + disp_cpu, + distance); + } + } + } + + distance = cpu_home_node_dispatch_distance(disp_cpu); + if (distance < 0) + pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n", + smp_processor_id()); + else { + switch (distance) { + case 0: + disp->numa_home_disp++; + break; + case 1: + disp->numa_remote_disp++; + break; + case 2: + disp->numa_far_disp++; + break; + default: + pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n", + smp_processor_id(), + disp_cpu, + distance); + } + } + + disp->last_disp_cpu = disp_cpu; +} + +static void process_dtl_buffer(struct work_struct *work) +{ + struct dtl_entry dtle; + u64 i = __this_cpu_read(dtl_entry_ridx); + struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + struct dtl_worker *d = container_of(work, struct dtl_worker, work.work); + + if (!local_paca->dispatch_log) + return; + + /* if we have been migrated away, we cancel ourself */ + if (d->cpu != smp_processor_id()) { + pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n", + smp_processor_id()); + return; + } + + if (i == be64_to_cpu(vpa->dtl_idx)) + goto out; + + while (i < be64_to_cpu(vpa->dtl_idx)) { + dtle = *dtl; + barrier(); + if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { + /* buffer has overflowed */ + pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n", + d->cpu, + be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i); + i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id)); + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + + __this_cpu_write(dtl_entry_ridx, i); + +out: + schedule_delayed_work_on(d->cpu, to_delayed_work(work), + HZ / vcpudispatch_stats_freq); +} + +static int dtl_worker_online(unsigned int cpu) +{ + struct dtl_worker *d = &per_cpu(dtl_workers, cpu); + + memset(d, 0, sizeof(*d)); + INIT_DELAYED_WORK(&d->work, process_dtl_buffer); + d->cpu = cpu; + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + per_cpu(dtl_entry_ridx, cpu) = 0; + register_dtl_buffer(cpu); +#else + per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx); +#endif + + schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq); + return 0; +} + +static int dtl_worker_offline(unsigned int cpu) +{ + struct dtl_worker *d = &per_cpu(dtl_workers, cpu); + + cancel_delayed_work_sync(&d->work); + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + unregister_dtl(get_hard_smp_processor_id(cpu)); +#endif + + return 0; +} + +static void set_global_dtl_mask(u8 mask) +{ + int cpu; + + dtl_mask = mask; + for_each_present_cpu(cpu) + lppaca_of(cpu).dtl_enable_mask = dtl_mask; +} + +static void reset_global_dtl_mask(void) +{ + int cpu; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + dtl_mask = DTL_LOG_PREEMPT; +#else + dtl_mask = 0; +#endif + for_each_present_cpu(cpu) + lppaca_of(cpu).dtl_enable_mask = dtl_mask; +} + +static int dtl_worker_enable(unsigned long *time_limit) +{ + int rc = 0, state; + + if (!write_trylock(&dtl_access_lock)) { + rc = -EBUSY; + goto out; + } + + set_global_dtl_mask(DTL_LOG_ALL); + + /* Setup dtl buffers and register those */ + alloc_dtl_buffers(time_limit); + + state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online", + dtl_worker_online, dtl_worker_offline); + if (state < 0) { + pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n"); + free_dtl_buffers(time_limit); + reset_global_dtl_mask(); + write_unlock(&dtl_access_lock); + rc = -EINVAL; + goto out; + } + dtl_worker_state = state; + +out: + return rc; +} + +static void dtl_worker_disable(unsigned long *time_limit) +{ + cpuhp_remove_state(dtl_worker_state); + free_dtl_buffers(time_limit); + reset_global_dtl_mask(); + write_unlock(&dtl_access_lock); +} + +static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p, + size_t count, loff_t *ppos) +{ + unsigned long time_limit = jiffies + HZ; + struct vcpu_dispatch_data *disp; + int rc, cmd, cpu; + char buf[16]; + + if (count > 15) + return -EINVAL; + + if (copy_from_user(buf, p, count)) + return -EFAULT; + + buf[count] = 0; + rc = kstrtoint(buf, 0, &cmd); + if (rc || cmd < 0 || cmd > 1) { + pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n"); + return rc ? rc : -EINVAL; + } + + mutex_lock(&dtl_enable_mutex); + + if ((cmd == 0 && !vcpudispatch_stats_on) || + (cmd == 1 && vcpudispatch_stats_on)) + goto out; + + if (cmd) { + rc = init_cpu_associativity(); + if (rc) { + destroy_cpu_associativity(); + goto out; + } + + for_each_possible_cpu(cpu) { + disp = per_cpu_ptr(&vcpu_disp_data, cpu); + memset(disp, 0, sizeof(*disp)); + disp->last_disp_cpu = -1; + } + + rc = dtl_worker_enable(&time_limit); + if (rc) { + destroy_cpu_associativity(); + goto out; + } + } else { + dtl_worker_disable(&time_limit); + destroy_cpu_associativity(); + } + + vcpudispatch_stats_on = cmd; + +out: + mutex_unlock(&dtl_enable_mutex); + if (rc) + return rc; + return count; +} + +static int vcpudispatch_stats_display(struct seq_file *p, void *v) +{ + int cpu; + struct vcpu_dispatch_data *disp; + + if (!vcpudispatch_stats_on) { + seq_puts(p, "off\n"); + return 0; + } + + for_each_online_cpu(cpu) { + disp = per_cpu_ptr(&vcpu_disp_data, cpu); + seq_printf(p, "cpu%d", cpu); + seq_put_decimal_ull(p, " ", disp->total_disp); + seq_put_decimal_ull(p, " ", disp->same_cpu_disp); + seq_put_decimal_ull(p, " ", disp->same_chip_disp); + seq_put_decimal_ull(p, " ", disp->diff_chip_disp); + seq_put_decimal_ull(p, " ", disp->far_chip_disp); + seq_put_decimal_ull(p, " ", disp->numa_home_disp); + seq_put_decimal_ull(p, " ", disp->numa_remote_disp); + seq_put_decimal_ull(p, " ", disp->numa_far_disp); + seq_puts(p, "\n"); + } + + return 0; +} + +static int vcpudispatch_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, vcpudispatch_stats_display, NULL); +} + +static const struct proc_ops vcpudispatch_stats_proc_ops = { + .proc_open = vcpudispatch_stats_open, + .proc_read = seq_read, + .proc_write = vcpudispatch_stats_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static ssize_t vcpudispatch_stats_freq_write(struct file *file, + const char __user *p, size_t count, loff_t *ppos) +{ + int rc, freq; + char buf[16]; + + if (count > 15) + return -EINVAL; + + if (copy_from_user(buf, p, count)) + return -EFAULT; + + buf[count] = 0; + rc = kstrtoint(buf, 0, &freq); + if (rc || freq < 1 || freq > HZ) { + pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n", + HZ); + return rc ? rc : -EINVAL; + } + + vcpudispatch_stats_freq = freq; + + return count; +} + +static int vcpudispatch_stats_freq_display(struct seq_file *p, void *v) +{ + seq_printf(p, "%d\n", vcpudispatch_stats_freq); + return 0; +} + +static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file) +{ + return single_open(file, vcpudispatch_stats_freq_display, NULL); +} + +static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { + .proc_open = vcpudispatch_stats_freq_open, + .proc_read = seq_read, + .proc_write = vcpudispatch_stats_freq_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init vcpudispatch_stats_procfs_init(void) +{ + if (!lppaca_shared_proc()) + return 0; + + if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL, + &vcpudispatch_stats_proc_ops)) + pr_err("vcpudispatch_stats: error creating procfs file\n"); + else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL, + &vcpudispatch_stats_freq_proc_ops)) + pr_err("vcpudispatch_stats_freq: error creating procfs file\n"); + + return 0; +} + +machine_device_initcall(pseries, vcpudispatch_stats_procfs_init); + +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING +u64 pseries_paravirt_steal_clock(int cpu) +{ + struct lppaca *lppaca = &lppaca_of(cpu); + + return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) + + be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb)); +} +#endif + +#endif /* CONFIG_PPC_SPLPAR */ + +void vpa_init(int cpu) +{ + int hwcpu = get_hard_smp_processor_id(cpu); + unsigned long addr; + long ret; + + /* + * The spec says it "may be problematic" if CPU x registers the VPA of + * CPU y. We should never do that, but wail if we ever do. + */ + WARN_ON(cpu != smp_processor_id()); + + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + lppaca_of(cpu).vmxregs_in_use = 1; + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + lppaca_of(cpu).ebb_regs_in_use = 1; + + addr = __pa(&lppaca_of(cpu)); + ret = register_vpa(hwcpu, addr); + + if (ret) { + pr_err("WARNING: VPA registration for cpu %d (hw %d) of area " + "%lx failed with %ld\n", cpu, hwcpu, addr, ret); + return; + } + +#ifdef CONFIG_PPC_64S_HASH_MMU + /* + * PAPR says this feature is SLB-Buffer but firmware never + * reports that. All SPLPAR support SLB shadow buffer. + */ + if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) { + addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr); + ret = register_slb_shadow(hwcpu, addr); + if (ret) + pr_err("WARNING: SLB shadow buffer registration for " + "cpu %d (hw %d) of area %lx failed with %ld\n", + cpu, hwcpu, addr, ret); + } +#endif /* CONFIG_PPC_64S_HASH_MMU */ + + /* + * Register dispatch trace log, if one has been allocated. + */ + register_dtl_buffer(cpu); +} + +#ifdef CONFIG_PPC_BOOK3S_64 + +static int __init pseries_lpar_register_process_table(unsigned long base, + unsigned long page_size, unsigned long table_size) +{ + long rc; + unsigned long flags = 0; + + if (table_size) + flags |= PROC_TABLE_NEW; + if (radix_enabled()) { + flags |= PROC_TABLE_RADIX; + if (mmu_has_feature(MMU_FTR_GTSE)) + flags |= PROC_TABLE_GTSE; + } else + flags |= PROC_TABLE_HPT_SLB; + for (;;) { + rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, + page_size, table_size); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + if (rc != H_SUCCESS) { + pr_err("Failed to register process table (rc=%ld)\n", rc); + BUG(); + } + return rc; +} + +#ifdef CONFIG_PPC_64S_HASH_MMU + +static long pSeries_lpar_hpte_insert(unsigned long hpte_group, + unsigned long vpn, unsigned long pa, + unsigned long rflags, unsigned long vflags, + int psize, int apsize, int ssize) +{ + unsigned long lpar_rc; + unsigned long flags; + unsigned long slot; + unsigned long hpte_v, hpte_r; + + if (!(vflags & HPTE_V_BOLTED)) + pr_devel("hpte_insert(group=%lx, vpn=%016lx, " + "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n", + hpte_group, vpn, pa, rflags, vflags, psize); + + hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; + + if (!(vflags & HPTE_V_BOLTED)) + pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); + + /* Now fill in the actual HPTE */ + /* Set CEC cookie to 0 */ + /* Zero page = 0 */ + /* I-cache Invalidate = 0 */ + /* I-cache synchronize = 0 */ + /* Exact = 0 */ + flags = 0; + + if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) + flags |= H_COALESCE_CAND; + + lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); + if (unlikely(lpar_rc == H_PTEG_FULL)) { + pr_devel("Hash table group is full\n"); + return -1; + } + + /* + * Since we try and ioremap PHBs we don't own, the pte insert + * will fail. However we must catch the failure in hash_page + * or we will loop forever, so return -2 in this case. + */ + if (unlikely(lpar_rc != H_SUCCESS)) { + pr_err("Failed hash pte insert with error %ld\n", lpar_rc); + return -2; + } + if (!(vflags & HPTE_V_BOLTED)) + pr_devel(" -> slot: %lu\n", slot & 7); + + /* Because of iSeries, we have to pass down the secondary + * bucket bit here as well + */ + return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); +} + +static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); + +static long pSeries_lpar_hpte_remove(unsigned long hpte_group) +{ + unsigned long slot_offset; + unsigned long lpar_rc; + int i; + unsigned long dummy1, dummy2; + + /* pick a random slot to start at */ + slot_offset = mftb() & 0x7; + + for (i = 0; i < HPTES_PER_GROUP; i++) { + + /* don't remove a bolted entry */ + lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, + HPTE_V_BOLTED, &dummy1, &dummy2); + if (lpar_rc == H_SUCCESS) + return i; + + /* + * The test for adjunct partition is performed before the + * ANDCOND test. H_RESOURCE may be returned, so we need to + * check for that as well. + */ + BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); + + slot_offset++; + slot_offset &= 0x7; + } + + return -1; +} + +/* Called during kexec sequence with MMU off */ +static notrace void manual_hpte_clear_all(void) +{ + unsigned long size_bytes = 1UL << ppc64_pft_size; + unsigned long hpte_count = size_bytes >> 4; + struct { + unsigned long pteh; + unsigned long ptel; + } ptes[4]; + long lpar_rc; + unsigned long i, j; + + /* Read in batches of 4, + * invalidate only valid entries not in the VRMA + * hpte_count will be a multiple of 4 + */ + for (i = 0; i < hpte_count; i += 4) { + lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); + if (lpar_rc != H_SUCCESS) { + pr_info("Failed to read hash page table at %ld err %ld\n", + i, lpar_rc); + continue; + } + for (j = 0; j < 4; j++){ + if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == + HPTE_V_VRMA_MASK) + continue; + if (ptes[j].pteh & HPTE_V_VALID) + plpar_pte_remove_raw(0, i + j, 0, + &(ptes[j].pteh), &(ptes[j].ptel)); + } + } +} + +/* Called during kexec sequence with MMU off */ +static notrace int hcall_hpte_clear_all(void) +{ + int rc; + + do { + rc = plpar_hcall_norets(H_CLEAR_HPT); + } while (rc == H_CONTINUE); + + return rc; +} + +/* Called during kexec sequence with MMU off */ +static notrace void pseries_hpte_clear_all(void) +{ + int rc; + + rc = hcall_hpte_clear_all(); + if (rc != H_SUCCESS) + manual_hpte_clear_all(); + +#ifdef __LITTLE_ENDIAN__ + /* + * Reset exceptions to big endian. + * + * FIXME this is a hack for kexec, we need to reset the exception + * endian before starting the new kernel and this is a convenient place + * to do it. + * + * This is also called on boot when a fadump happens. In that case we + * must not change the exception endian mode. + */ + if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active()) + pseries_big_endian_exceptions(); +#endif +} + +/* + * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and + * the low 3 bits of flags happen to line up. So no transform is needed. + * We can probably optimize here and assume the high bits of newpp are + * already zero. For now I am paranoid. + */ +static long pSeries_lpar_hpte_updatepp(unsigned long slot, + unsigned long newpp, + unsigned long vpn, + int psize, int apsize, + int ssize, unsigned long inv_flags) +{ + unsigned long lpar_rc; + unsigned long flags; + unsigned long want_v; + + want_v = hpte_encode_avpn(vpn, psize, ssize); + + flags = (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO)) | H_AVPN; + flags |= (newpp & HPTE_R_KEY_HI) >> 48; + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) + /* Move pp0 into bit 8 (IBM 55) */ + flags |= (newpp & HPTE_R_PP0) >> 55; + + pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", + want_v, slot, flags, psize); + + lpar_rc = plpar_pte_protect(flags, slot, want_v); + + if (lpar_rc == H_NOT_FOUND) { + pr_devel("not found !\n"); + return -1; + } + + pr_devel("ok\n"); + + BUG_ON(lpar_rc != H_SUCCESS); + + return 0; +} + +static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group) +{ + long lpar_rc; + unsigned long i, j; + struct { + unsigned long pteh; + unsigned long ptel; + } ptes[4]; + + for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { + + lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); + if (lpar_rc != H_SUCCESS) { + pr_info("Failed to read hash page table at %ld err %ld\n", + hpte_group, lpar_rc); + continue; + } + + for (j = 0; j < 4; j++) { + if (HPTE_V_COMPARE(ptes[j].pteh, want_v) && + (ptes[j].pteh & HPTE_V_VALID)) + return i + j; + } + } + + return -1; +} + +static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) +{ + long slot; + unsigned long hash; + unsigned long want_v; + unsigned long hpte_group; + + hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); + + /* + * We try to keep bolted entries always in primary hash + * But in some case we can find them in secondary too. + */ + hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __pSeries_lpar_hpte_find(want_v, hpte_group); + if (slot < 0) { + /* Try in secondary */ + hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; + slot = __pSeries_lpar_hpte_find(want_v, hpte_group); + if (slot < 0) + return -1; + } + return hpte_group + slot; +} + +static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, + unsigned long ea, + int psize, int ssize) +{ + unsigned long vpn; + unsigned long lpar_rc, slot, vsid, flags; + + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + + slot = pSeries_lpar_hpte_find(vpn, psize, ssize); + BUG_ON(slot == -1); + + flags = newpp & (HPTE_R_PP | HPTE_R_N); + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) + /* Move pp0 into bit 8 (IBM 55) */ + flags |= (newpp & HPTE_R_PP0) >> 55; + + flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO); + + lpar_rc = plpar_pte_protect(flags, slot, 0); + + BUG_ON(lpar_rc != H_SUCCESS); +} + +static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, + int psize, int apsize, + int ssize, int local) +{ + unsigned long want_v; + unsigned long lpar_rc; + unsigned long dummy1, dummy2; + + pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", + slot, vpn, psize, local); + + want_v = hpte_encode_avpn(vpn, psize, ssize); + lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); + if (lpar_rc == H_NOT_FOUND) + return; + + BUG_ON(lpar_rc != H_SUCCESS); +} + + +/* + * As defined in the PAPR's section 14.5.4.1.8 + * The control mask doesn't include the returned reference and change bit from + * the processed PTE. + */ +#define HBLKR_AVPN 0x0100000000000000UL +#define HBLKR_CTRL_MASK 0xf800000000000000UL +#define HBLKR_CTRL_SUCCESS 0x8000000000000000UL +#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL +#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL + +/* + * Returned true if we are supporting this block size for the specified segment + * base page size and actual page size. + * + * Currently, we only support 8 size block. + */ +static inline bool is_supported_hlbkrm(int bpsize, int psize) +{ + return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE); +} + +/** + * H_BLOCK_REMOVE caller. + * @idx should point to the latest @param entry set with a PTEX. + * If PTE cannot be processed because another CPUs has already locked that + * group, those entries are put back in @param starting at index 1. + * If entries has to be retried and @retry_busy is set to true, these entries + * are retried until success. If @retry_busy is set to false, the returned + * is the number of entries yet to process. + */ +static unsigned long call_block_remove(unsigned long idx, unsigned long *param, + bool retry_busy) +{ + unsigned long i, rc, new_idx; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + if (idx < 2) { + pr_warn("Unexpected empty call to H_BLOCK_REMOVE"); + return 0; + } +again: + new_idx = 0; + if (idx > PLPAR_HCALL9_BUFSIZE) { + pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx); + idx = PLPAR_HCALL9_BUFSIZE; + } else if (idx < PLPAR_HCALL9_BUFSIZE) + param[idx] = HBR_END; + + rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf, + param[0], /* AVA */ + param[1], param[2], param[3], param[4], /* TS0-7 */ + param[5], param[6], param[7], param[8]); + if (rc == H_SUCCESS) + return 0; + + BUG_ON(rc != H_PARTIAL); + + /* Check that the unprocessed entries were 'not found' or 'busy' */ + for (i = 0; i < idx-1; i++) { + unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK; + + if (ctrl == HBLKR_CTRL_ERRBUSY) { + param[++new_idx] = param[i+1]; + continue; + } + + BUG_ON(ctrl != HBLKR_CTRL_SUCCESS + && ctrl != HBLKR_CTRL_ERRNOTFOUND); + } + + /* + * If there were entries found busy, retry these entries if requested, + * of if all the entries have to be retried. + */ + if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) { + idx = new_idx + 1; + goto again; + } + + return new_idx; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* + * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need + * to make sure that we avoid bouncing the hypervisor tlbie lock. + */ +#define PPC64_HUGE_HPTE_BATCH 12 + +static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn, + int count, int psize, int ssize) +{ + unsigned long param[PLPAR_HCALL9_BUFSIZE]; + unsigned long shift, current_vpgb, vpgb; + int i, pix = 0; + + shift = mmu_psize_defs[psize].shift; + + for (i = 0; i < count; i++) { + /* + * Shifting 3 bits more on the right to get a + * 8 pages aligned virtual addresse. + */ + vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3)); + if (!pix || vpgb != current_vpgb) { + /* + * Need to start a new 8 pages block, flush + * the current one if needed. + */ + if (pix) + (void)call_block_remove(pix, param, true); + current_vpgb = vpgb; + param[0] = hpte_encode_avpn(vpn[i], psize, ssize); + pix = 1; + } + + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i]; + if (pix == PLPAR_HCALL9_BUFSIZE) { + pix = call_block_remove(pix, param, false); + /* + * pix = 0 means that all the entries were + * removed, we can start a new block. + * Otherwise, this means that there are entries + * to retry, and pix points to latest one, so + * we should increment it and try to continue + * the same block. + */ + if (pix) + pix++; + } + } + if (pix) + (void)call_block_remove(pix, param, true); +} + +static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn, + int count, int psize, int ssize) +{ + unsigned long param[PLPAR_HCALL9_BUFSIZE]; + int i = 0, pix = 0, rc; + + for (i = 0; i < count; i++) { + + if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { + pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0, + ssize, 0); + } else { + param[pix] = HBR_REQUEST | HBR_AVPN | slot[i]; + param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize); + pix += 2; + if (pix == 8) { + rc = plpar_hcall9(H_BULK_REMOVE, param, + param[0], param[1], param[2], + param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + pix = 0; + } + } + } + if (pix) { + param[pix] = HBR_END; + rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], + param[2], param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + } +} + +static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, + unsigned long *vpn, + int count, int psize, + int ssize) +{ + unsigned long flags = 0; + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + /* Assuming THP size is 16M */ + if (is_supported_hlbkrm(psize, MMU_PAGE_16M)) + hugepage_block_invalidate(slot, vpn, count, psize, ssize); + else + hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); + + if (lock_tlbie) + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize, int local) +{ + int i, index = 0; + unsigned long s_addr = addr; + unsigned int max_hpte_count, valid; + unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; + unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; + unsigned long shift, hidx, vpn = 0, hash, slot; + + shift = mmu_psize_defs[psize].shift; + max_hpte_count = 1U << (PMD_SHIFT - shift); + + for (i = 0; i < max_hpte_count; i++) { + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + + slot_array[index] = slot; + vpn_array[index] = vpn; + if (index == PPC64_HUGE_HPTE_BATCH - 1) { + /* + * Now do a bluk invalidate + */ + __pSeries_lpar_hugepage_invalidate(slot_array, + vpn_array, + PPC64_HUGE_HPTE_BATCH, + psize, ssize); + index = 0; + } else + index++; + } + if (index) + __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, + index, psize, ssize); +} +#else +static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize, int local) +{ + WARN(1, "%s called without THP support\n", __func__); +} +#endif + +static int pSeries_lpar_hpte_removebolted(unsigned long ea, + int psize, int ssize) +{ + unsigned long vpn; + unsigned long slot, vsid; + + vsid = get_kernel_vsid(ea, ssize); + vpn = hpt_vpn(ea, vsid, ssize); + + slot = pSeries_lpar_hpte_find(vpn, psize, ssize); + if (slot == -1) + return -ENOENT; + + /* + * lpar doesn't use the passed actual page size + */ + pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); + return 0; +} + + +static inline unsigned long compute_slot(real_pte_t pte, + unsigned long vpn, + unsigned long index, + unsigned long shift, + int ssize) +{ + unsigned long slot, hash, hidx; + + hash = hpt_hash(vpn, shift, ssize); + hidx = __rpte_to_hidx(pte, index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + return slot; +} + +/** + * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are + * "all within the same naturally aligned 8 page virtual address block". + */ +static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch, + unsigned long *param) +{ + unsigned long vpn; + unsigned long i, pix = 0; + unsigned long index, shift, slot, current_vpgb, vpgb; + real_pte_t pte; + int psize, ssize; + + psize = batch->psize; + ssize = batch->ssize; + + for (i = 0; i < number; i++) { + vpn = batch->vpn[i]; + pte = batch->pte[i]; + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { + /* + * Shifting 3 bits more on the right to get a + * 8 pages aligned virtual addresse. + */ + vpgb = (vpn >> (shift - VPN_SHIFT + 3)); + if (!pix || vpgb != current_vpgb) { + /* + * Need to start a new 8 pages block, flush + * the current one if needed. + */ + if (pix) + (void)call_block_remove(pix, param, + true); + current_vpgb = vpgb; + param[0] = hpte_encode_avpn(vpn, psize, + ssize); + pix = 1; + } + + slot = compute_slot(pte, vpn, index, shift, ssize); + param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot; + + if (pix == PLPAR_HCALL9_BUFSIZE) { + pix = call_block_remove(pix, param, false); + /* + * pix = 0 means that all the entries were + * removed, we can start a new block. + * Otherwise, this means that there are entries + * to retry, and pix points to latest one, so + * we should increment it and try to continue + * the same block. + */ + if (pix) + pix++; + } + } pte_iterate_hashed_end(); + } + + if (pix) + (void)call_block_remove(pix, param, true); +} + +/* + * TLB Block Invalidate Characteristics + * + * These characteristics define the size of the block the hcall H_BLOCK_REMOVE + * is able to process for each couple segment base page size, actual page size. + * + * The ibm,get-system-parameter properties is returning a buffer with the + * following layout: + * + * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ] + * ----------------- + * TLB Block Invalidate Specifiers: + * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ] + * [ 1 byte Number of page sizes (N) that are supported for the specified + * TLB invalidate block size ] + * [ 1 byte Encoded segment base page size and actual page size + * MSB=0 means 4k segment base page size and actual page size + * MSB=1 the penc value in mmu_psize_def ] + * ... + * ----------------- + * Next TLB Block Invalidate Specifiers... + * ----------------- + * [ 0 ] + */ +static inline void set_hblkrm_bloc_size(int bpsize, int psize, + unsigned int block_size) +{ + if (block_size > hblkrm_size[bpsize][psize]) + hblkrm_size[bpsize][psize] = block_size; +} + +/* + * Decode the Encoded segment base page size and actual page size. + * PAPR specifies: + * - bit 7 is the L bit + * - bits 0-5 are the penc value + * If the L bit is 0, this means 4K segment base page size and actual page size + * otherwise the penc value should be read. + */ +#define HBLKRM_L_MASK 0x80 +#define HBLKRM_PENC_MASK 0x3f +static inline void __init check_lp_set_hblkrm(unsigned int lp, + unsigned int block_size) +{ + unsigned int bpsize, psize; + + /* First, check the L bit, if not set, this means 4K */ + if ((lp & HBLKRM_L_MASK) == 0) { + set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size); + return; + } + + lp &= HBLKRM_PENC_MASK; + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) { + struct mmu_psize_def *def = &mmu_psize_defs[bpsize]; + + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + if (def->penc[psize] == lp) { + set_hblkrm_bloc_size(bpsize, psize, block_size); + return; + } + } + } +} + +/* + * The size of the TLB Block Invalidate Characteristics is variable. But at the + * maximum it will be the number of possible page sizes *2 + 10 bytes. + * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size + * (128 bytes) for the buffer to get plenty of space. + */ +#define SPLPAR_TLB_BIC_MAXLENGTH 128 + +void __init pseries_lpar_read_hblkrm_characteristics(void) +{ + static struct papr_sysparm_buf buf __initdata; + int len, idx, bpsize; + + if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) + return; + + if (papr_sysparm_get(PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS, &buf)) + return; + + len = be16_to_cpu(buf.len); + if (len > SPLPAR_TLB_BIC_MAXLENGTH) { + pr_warn("%s too large returned buffer %d", __func__, len); + return; + } + + idx = 0; + while (idx < len) { + u8 block_shift = buf.val[idx++]; + u32 block_size; + unsigned int npsize; + + if (!block_shift) + break; + + block_size = 1 << block_shift; + + for (npsize = buf.val[idx++]; + npsize > 0 && idx < len; npsize--) + check_lp_set_hblkrm((unsigned int)buf.val[idx++], + block_size); + } + + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) + for (idx = 0; idx < MMU_PAGE_COUNT; idx++) + if (hblkrm_size[bpsize][idx]) + pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d", + bpsize, idx, hblkrm_size[bpsize][idx]); +} + +/* + * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie + * lock. + */ +static void pSeries_lpar_flush_hash_range(unsigned long number, int local) +{ + unsigned long vpn; + unsigned long i, pix, rc; + unsigned long flags = 0; + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + unsigned long param[PLPAR_HCALL9_BUFSIZE]; + unsigned long index, shift, slot; + real_pte_t pte; + int psize, ssize; + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + if (is_supported_hlbkrm(batch->psize, batch->psize)) { + do_block_remove(number, batch, param); + goto out; + } + + psize = batch->psize; + ssize = batch->ssize; + pix = 0; + for (i = 0; i < number; i++) { + vpn = batch->vpn[i]; + pte = batch->pte[i]; + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { + slot = compute_slot(pte, vpn, index, shift, ssize); + if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { + /* + * lpar doesn't use the passed actual page size + */ + pSeries_lpar_hpte_invalidate(slot, vpn, psize, + 0, ssize, local); + } else { + param[pix] = HBR_REQUEST | HBR_AVPN | slot; + param[pix+1] = hpte_encode_avpn(vpn, psize, + ssize); + pix += 2; + if (pix == 8) { + rc = plpar_hcall9(H_BULK_REMOVE, param, + param[0], param[1], param[2], + param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + pix = 0; + } + } + } pte_iterate_hashed_end(); + } + if (pix) { + param[pix] = HBR_END; + rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], + param[2], param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + } + +out: + if (lock_tlbie) + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +static int __init disable_bulk_remove(char *str) +{ + if (strcmp(str, "off") == 0 && + firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { + pr_info("Disabling BULK_REMOVE firmware feature"); + powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; + } + return 1; +} + +__setup("bulk_remove=", disable_bulk_remove); + +#define HPT_RESIZE_TIMEOUT 10000 /* ms */ + +struct hpt_resize_state { + unsigned long shift; + int commit_rc; +}; + +static int pseries_lpar_resize_hpt_commit(void *data) +{ + struct hpt_resize_state *state = data; + + state->commit_rc = plpar_resize_hpt_commit(0, state->shift); + if (state->commit_rc != H_SUCCESS) + return -EIO; + + /* Hypervisor has transitioned the HTAB, update our globals */ + ppc64_pft_size = state->shift; + htab_size_bytes = 1UL << ppc64_pft_size; + htab_hash_mask = (htab_size_bytes >> 7) - 1; + + return 0; +} + +/* + * Must be called in process context. The caller must hold the + * cpus_lock. + */ +static int pseries_lpar_resize_hpt(unsigned long shift) +{ + struct hpt_resize_state state = { + .shift = shift, + .commit_rc = H_FUNCTION, + }; + unsigned int delay, total_delay = 0; + int rc; + ktime_t t0, t1, t2; + + might_sleep(); + + if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) + return -ENODEV; + + pr_info("Attempting to resize HPT to shift %lu\n", shift); + + t0 = ktime_get(); + + rc = plpar_resize_hpt_prepare(0, shift); + while (H_IS_LONG_BUSY(rc)) { + delay = get_longbusy_msecs(rc); + total_delay += delay; + if (total_delay > HPT_RESIZE_TIMEOUT) { + /* prepare with shift==0 cancels an in-progress resize */ + rc = plpar_resize_hpt_prepare(0, 0); + if (rc != H_SUCCESS) + pr_warn("Unexpected error %d cancelling timed out HPT resize\n", + rc); + return -ETIMEDOUT; + } + msleep(delay); + rc = plpar_resize_hpt_prepare(0, shift); + } + + switch (rc) { + case H_SUCCESS: + /* Continue on */ + break; + + case H_PARAMETER: + pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); + return -EINVAL; + case H_RESOURCE: + pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); + return -EPERM; + default: + pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); + return -EIO; + } + + t1 = ktime_get(); + + rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit, + &state, NULL); + + t2 = ktime_get(); + + if (rc != 0) { + switch (state.commit_rc) { + case H_PTEG_FULL: + return -ENOSPC; + + default: + pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n", + state.commit_rc); + return -EIO; + }; + } + + pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n", + shift, (long long) ktime_ms_delta(t1, t0), + (long long) ktime_ms_delta(t2, t1)); + + return 0; +} + +void __init hpte_init_pseries(void) +{ + mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = pSeries_lpar_hpte_insert; + mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; + mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; + mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; + mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; + mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; + + if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) + mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; + + /* + * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall + * to inform the hypervisor that we wish to use the HPT. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pseries_lpar_register_process_table(0, 0, 0); +} +#endif /* CONFIG_PPC_64S_HASH_MMU */ + +#ifdef CONFIG_PPC_RADIX_MMU +void __init radix_init_pseries(void) +{ + pr_info("Using radix MMU under hypervisor\n"); + + pseries_lpar_register_process_table(__pa(process_tb), + 0, PRTB_SIZE_SHIFT - 12); +} +#endif + +#ifdef CONFIG_PPC_SMLPAR +#define CMO_FREE_HINT_DEFAULT 1 +static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; + +static int __init cmo_free_hint(char *str) +{ + char *parm; + parm = strstrip(str); + + if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { + pr_info("%s: CMO free page hinting is not active.\n", __func__); + cmo_free_hint_flag = 0; + return 1; + } + + cmo_free_hint_flag = 1; + pr_info("%s: CMO free page hinting is active.\n", __func__); + + if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) + return 1; + + return 0; +} + +__setup("cmo_free_hint=", cmo_free_hint); + +static void pSeries_set_page_state(struct page *page, int order, + unsigned long state) +{ + int i, j; + unsigned long cmo_page_sz, addr; + + cmo_page_sz = cmo_get_page_size(); + addr = __pa((unsigned long)page_address(page)); + + for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) { + for (j = 0; j < PAGE_SIZE; j += cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0); + } +} + +void arch_free_page(struct page *page, int order) +{ + if (radix_enabled()) + return; + if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO)) + return; + + pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED); +} +EXPORT_SYMBOL(arch_free_page); + +#endif /* CONFIG_PPC_SMLPAR */ +#endif /* CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_TRACEPOINTS +#ifdef CONFIG_JUMP_LABEL +struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; + +int hcall_tracepoint_regfunc(void) +{ + static_key_slow_inc(&hcall_tracepoint_key); + return 0; +} + +void hcall_tracepoint_unregfunc(void) +{ + static_key_slow_dec(&hcall_tracepoint_key); +} +#else +/* + * We optimise our hcall path by placing hcall_tracepoint_refcount + * directly in the TOC so we can check if the hcall tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long hcall_tracepoint_refcount; + +int hcall_tracepoint_regfunc(void) +{ + hcall_tracepoint_refcount++; + return 0; +} + +void hcall_tracepoint_unregfunc(void) +{ + hcall_tracepoint_refcount--; +} +#endif + +/* + * Keep track of hcall tracing depth and prevent recursion. Warn if any is + * detected because it may indicate a problem. This will not catch all + * problems with tracing code making hcalls, because the tracing might have + * been invoked from a non-hcall, so the first hcall could recurse into it + * without warning here, but this better than nothing. + * + * Hcalls with specific problems being traced should use the _notrace + * plpar_hcall variants. + */ +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); + + +notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = this_cpu_ptr(&hcall_trace_depth); + + if (WARN_ON_ONCE(*depth)) + goto out; + + (*depth)++; + preempt_disable(); + trace_hcall_entry(opcode, args); + (*depth)--; + +out: + local_irq_restore(flags); +} + +notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + + depth = this_cpu_ptr(&hcall_trace_depth); + + if (*depth) /* Don't warn again on the way out */ + goto out; + + (*depth)++; + trace_hcall_exit(opcode, retval, retbuf); + preempt_enable(); + (*depth)--; + +out: + local_irq_restore(flags); +} +#endif + +/** + * h_get_mpp + * H_GET_MPP hcall returns info in 7 parms + */ +int h_get_mpp(struct hvcall_mpp_data *mpp_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_MPP, retbuf); + + mpp_data->entitled_mem = retbuf[0]; + mpp_data->mapped_mem = retbuf[1]; + + mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + mpp_data->pool_num = retbuf[2] & 0xffff; + + mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; + mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL; + + mpp_data->pool_size = retbuf[4]; + mpp_data->loan_request = retbuf[5]; + mpp_data->backing_mem = retbuf[6]; + + return rc; +} +EXPORT_SYMBOL(h_get_mpp); + +int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; + + rc = plpar_hcall9(H_GET_MPP_X, retbuf); + + mpp_x_data->coalesced_bytes = retbuf[0]; + mpp_x_data->pool_coalesced_bytes = retbuf[1]; + mpp_x_data->pool_purr_cycles = retbuf[2]; + mpp_x_data->pool_spurr_cycles = retbuf[3]; + + return rc; +} + +#ifdef CONFIG_PPC_64S_HASH_MMU +static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize) +{ + unsigned long protovsid; + unsigned long va_bits = VA_BITS; + unsigned long modinv, vsid_modulus; + unsigned long max_mod_inv, tmp_modinv; + + if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) + va_bits = 65; + + if (ssize == MMU_SEGSIZE_256M) { + modinv = VSID_MULINV_256M; + vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1); + } else { + modinv = VSID_MULINV_1T; + vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1); + } + + /* + * vsid outside our range. + */ + if (vsid >= vsid_modulus) + return 0; + + /* + * If modinv is the modular multiplicate inverse of (x % vsid_modulus) + * and vsid = (protovsid * x) % vsid_modulus, then we say: + * protovsid = (vsid * modinv) % vsid_modulus + */ + + /* Check if (vsid * modinv) overflow (63 bits) */ + max_mod_inv = 0x7fffffffffffffffull / vsid; + if (modinv < max_mod_inv) + return (vsid * modinv) % vsid_modulus; + + tmp_modinv = modinv/max_mod_inv; + modinv %= max_mod_inv; + + protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus; + protovsid = (protovsid + vsid * modinv) % vsid_modulus; + + return protovsid; +} + +static int __init reserve_vrma_context_id(void) +{ + unsigned long protovsid; + + /* + * Reserve context ids which map to reserved virtual addresses. For now + * we only reserve the context id which maps to the VRMA VSID. We ignore + * the addresses in "ibm,adjunct-virtual-addresses" because we don't + * enable adjunct support via the "ibm,client-architecture-support" + * interface. + */ + protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T); + hash__reserve_context_id(protovsid >> ESID_BITS_1T); + return 0; +} +machine_device_initcall(pseries, reserve_vrma_context_id); +#endif + +#ifdef CONFIG_DEBUG_FS +/* debugfs file interface for vpa data */ +static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + int cpu = (long)filp->private_data; + struct lppaca *lppaca = &lppaca_of(cpu); + + return simple_read_from_buffer(buf, len, pos, lppaca, + sizeof(struct lppaca)); +} + +static const struct file_operations vpa_fops = { + .open = simple_open, + .read = vpa_file_read, + .llseek = default_llseek, +}; + +static int __init vpa_debugfs_init(void) +{ + char name[16]; + long i; + struct dentry *vpa_dir; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir); + + /* set up the per-cpu vpa file*/ + for_each_possible_cpu(i) { + sprintf(name, "cpu-%ld", i); + debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops); + } + + return 0; +} +machine_arch_initcall(pseries, vpa_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c new file mode 100644 index 0000000000..1c151d77e7 --- /dev/null +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -0,0 +1,802 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerPC64 LPAR Configuration Information Driver + * + * Dave Engebretsen engebret@us.ibm.com + * Copyright (c) 2003 Dave Engebretsen + * Will Schmidt willschm@us.ibm.com + * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. + * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation. + * Nathan Lynch nathanl@austin.ibm.com + * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. + * + * This driver creates a proc file at /proc/ppc64/lparcfg which contains + * keyword - value pairs that specify the configuration of the partition. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" +#include "vas.h" /* pseries_vas_dlpar_cpu() */ + +/* + * This isn't a module but we expose that to userspace + * via /proc so leave the definitions here + */ +#define MODULE_VERS "1.9" +#define MODULE_NAME "lparcfg" + +/* #define LPARCFG_DEBUG */ + +/* + * Track sum of all purrs across all processors. This is used to further + * calculate usage values by different applications + */ +static void cpu_get_purr(void *arg) +{ + atomic64_t *sum = arg; + + atomic64_add(mfspr(SPRN_PURR), sum); +} + +static unsigned long get_purr(void) +{ + atomic64_t purr = ATOMIC64_INIT(0); + + on_each_cpu(cpu_get_purr, &purr, 1); + + return atomic64_read(&purr); +} + +/* + * Methods used to fetch LPAR data when running on a pSeries platform. + */ + +struct hvcall_ppp_data { + u64 entitlement; + u64 unallocated_entitlement; + u16 group_num; + u16 pool_num; + u8 capped; + u8 weight; + u8 unallocated_weight; + u16 active_procs_in_pool; + u16 active_system_procs; + u16 phys_platform_procs; + u32 max_proc_cap_avail; + u32 entitled_proc_cap_avail; +}; + +/* + * H_GET_PPP hcall returns info in 4 parms. + * entitled_capacity,unallocated_capacity, + * aggregation, resource_capability). + * + * R4 = Entitled Processor Capacity Percentage. + * R5 = Unallocated Processor Capacity Percentage. + * R6 (AABBCCDDEEFFGGHH). + * XXXX - reserved (0) + * XXXX - reserved (0) + * XXXX - Group Number + * XXXX - Pool Number. + * R7 (IIJJKKLLMMNNOOPP). + * XX - reserved. (0) + * XX - bit 0-6 reserved (0). bit 7 is Capped indicator. + * XX - variable processor Capacity Weight + * XX - Unallocated Variable Processor Capacity Weight. + * XXXX - Active processors in Physical Processor Pool. + * XXXX - Processors active on platform. + * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1 + * XXXX - Physical platform procs allocated to virtualization. + * XXXXXX - Max procs capacity % available to the partitions pool. + * XXXXXX - Entitled procs capacity % available to the + * partitions pool. + */ +static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) +{ + unsigned long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_PPP, retbuf); + + ppp_data->entitlement = retbuf[0]; + ppp_data->unallocated_entitlement = retbuf[1]; + + ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + ppp_data->pool_num = retbuf[2] & 0xffff; + + ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01; + ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff; + ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff; + ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; + ppp_data->active_system_procs = retbuf[3] & 0xffff; + + ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8; + ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff; + ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff; + + return rc; +} + +static void show_gpci_data(struct seq_file *m) +{ + struct hv_gpci_request_buffer *buf; + unsigned int affinity_score; + long ret; + + buf = kmalloc(sizeof(*buf), GFP_KERNEL); + if (buf == NULL) + return; + + /* + * Show the local LPAR's affinity score. + * + * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall. + * The score is at byte 0xB in the output buffer. + */ + memset(&buf->params, 0, sizeof(buf->params)); + buf->params.counter_request = cpu_to_be32(0xB1); + buf->params.starting_index = cpu_to_be32(-1); /* local LPAR */ + buf->params.counter_info_version_in = 0x5; /* v5+ for score */ + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf), + sizeof(*buf)); + if (ret != H_SUCCESS) { + pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n", + ret, be32_to_cpu(buf->params.detail_rc)); + goto out; + } + affinity_score = buf->bytes[0xB]; + seq_printf(m, "partition_affinity_score=%u\n", affinity_score); +out: + kfree(buf); +} + +static unsigned h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) +{ + unsigned long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_PIC, retbuf); + + *pool_idle_time = retbuf[0]; + *num_procs = retbuf[1]; + + return rc; +} + +/* + * parse_ppp_data + * Parse out the data returned from h_get_ppp and h_pic + */ +static void parse_ppp_data(struct seq_file *m) +{ + struct hvcall_ppp_data ppp_data; + struct device_node *root; + const __be32 *perf_level; + int rc; + + rc = h_get_ppp(&ppp_data); + if (rc) + return; + + seq_printf(m, "partition_entitled_capacity=%lld\n", + ppp_data.entitlement); + seq_printf(m, "group=%d\n", ppp_data.group_num); + seq_printf(m, "system_active_processors=%d\n", + ppp_data.active_system_procs); + + /* pool related entries are appropriate for shared configs */ + if (lppaca_shared_proc()) { + unsigned long pool_idle_time, pool_procs; + + seq_printf(m, "pool=%d\n", ppp_data.pool_num); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%d\n", + ppp_data.active_procs_in_pool * 100); + + h_pic(&pool_idle_time, &pool_procs); + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%d\n", + ppp_data.unallocated_weight); + seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); + seq_printf(m, "capped=%d\n", ppp_data.capped); + seq_printf(m, "unallocated_capacity=%lld\n", + ppp_data.unallocated_entitlement); + + /* The last bits of information returned from h_get_ppp are only + * valid if the ibm,partition-performance-parameters-level + * property is >= 1. + */ + root = of_find_node_by_path("/"); + if (root) { + perf_level = of_get_property(root, + "ibm,partition-performance-parameters-level", + NULL); + if (perf_level && (be32_to_cpup(perf_level) >= 1)) { + seq_printf(m, + "physical_procs_allocated_to_virtualization=%d\n", + ppp_data.phys_platform_procs); + seq_printf(m, "max_proc_capacity_available=%d\n", + ppp_data.max_proc_cap_avail); + seq_printf(m, "entitled_proc_capacity_available=%d\n", + ppp_data.entitled_proc_cap_avail); + } + + of_node_put(root); + } +} + +/** + * parse_mpp_data + * Parse out data returned from h_get_mpp + */ +static void parse_mpp_data(struct seq_file *m) +{ + struct hvcall_mpp_data mpp_data; + int rc; + + rc = h_get_mpp(&mpp_data); + if (rc) + return; + + seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem); + + if (mpp_data.mapped_mem != -1) + seq_printf(m, "mapped_entitled_memory=%ld\n", + mpp_data.mapped_mem); + + seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num); + seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num); + + seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight); + seq_printf(m, "unallocated_entitled_memory_weight=%d\n", + mpp_data.unallocated_mem_weight); + seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n", + mpp_data.unallocated_entitlement); + + if (mpp_data.pool_size != -1) + seq_printf(m, "entitled_memory_pool_size=%ld bytes\n", + mpp_data.pool_size); + + seq_printf(m, "entitled_memory_loan_request=%ld\n", + mpp_data.loan_request); + + seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); +} + +/** + * parse_mpp_x_data + * Parse out data returned from h_get_mpp_x + */ +static void parse_mpp_x_data(struct seq_file *m) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (!firmware_has_feature(FW_FEATURE_XCMO)) + return; + if (h_get_mpp_x(&mpp_x_data)) + return; + + seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); + + if (mpp_x_data.pool_coalesced_bytes) + seq_printf(m, "pool_coalesced_bytes=%ld\n", + mpp_x_data.pool_coalesced_bytes); + if (mpp_x_data.pool_purr_cycles) + seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); + if (mpp_x_data.pool_spurr_cycles) + seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); +} + +/* + * Read the lpar name using the RTAS ibm,get-system-parameter call. + * + * The name read through this call is updated if changes are made by the end + * user on the hypervisor side. + * + * Some hypervisor (like Qemu) may not provide this value. In that case, a non + * null value is returned. + */ +static int read_rtas_lpar_name(struct seq_file *m) +{ + struct papr_sysparm_buf *buf; + int err; + + buf = papr_sysparm_buf_alloc(); + if (!buf) + return -ENOMEM; + + err = papr_sysparm_get(PAPR_SYSPARM_LPAR_NAME, buf); + if (!err) + seq_printf(m, "partition_name=%s\n", buf->val); + + papr_sysparm_buf_free(buf); + return err; +} + +/* + * Read the LPAR name from the Device Tree. + * + * The value read in the DT is not updated if the end-user is touching the LPAR + * name on the hypervisor side. + */ +static int read_dt_lpar_name(struct seq_file *m) +{ + const char *name; + + if (of_property_read_string(of_root, "ibm,partition-name", &name)) + return -ENOENT; + + seq_printf(m, "partition_name=%s\n", name); + return 0; +} + +static void read_lpar_name(struct seq_file *m) +{ + if (read_rtas_lpar_name(m) && read_dt_lpar_name(m)) + pr_err_once("Error can't get the LPAR name"); +} + +#define SPLPAR_MAXLENGTH 1026*(sizeof(char)) + +/* + * parse_system_parameter_string() + * Retrieve the potential_processors, max_entitled_capacity and friends + * through the get-system-parameter rtas call. Replace keyword strings as + * necessary. + */ +static void parse_system_parameter_string(struct seq_file *m) +{ + struct papr_sysparm_buf *buf; + + buf = papr_sysparm_buf_alloc(); + if (!buf) + return; + + if (papr_sysparm_get(PAPR_SYSPARM_SHARED_PROC_LPAR_ATTRS, buf)) { + goto out_free; + } else { + const char *local_buffer; + int splpar_strlen; + int idx, w_idx; + char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + + if (!workbuffer) + goto out_free; + + splpar_strlen = be16_to_cpu(buf->len); + local_buffer = buf->val; + + w_idx = 0; + idx = 0; + while ((*local_buffer) && (idx < splpar_strlen)) { + workbuffer[w_idx++] = local_buffer[idx++]; + if ((local_buffer[idx] == ',') + || (local_buffer[idx] == '\0')) { + workbuffer[w_idx] = '\0'; + if (w_idx) { + /* avoid the empty string */ + seq_printf(m, "%s\n", workbuffer); + } + memset(workbuffer, 0, SPLPAR_MAXLENGTH); + idx++; /* skip the comma */ + w_idx = 0; + } else if (local_buffer[idx] == '=') { + /* code here to replace workbuffer contents + with different keyword strings */ + if (0 == strcmp(workbuffer, "MaxEntCap")) { + strcpy(workbuffer, + "partition_max_entitled_capacity"); + w_idx = strlen(workbuffer); + } + if (0 == strcmp(workbuffer, "MaxPlatProcs")) { + strcpy(workbuffer, + "system_potential_processors"); + w_idx = strlen(workbuffer); + } + } + } + kfree(workbuffer); + local_buffer -= 2; /* back up over strlen value */ + } +out_free: + papr_sysparm_buf_free(buf); +} + +/* Return the number of processors in the system. + * This function reads through the device tree and counts + * the virtual processors, this does not include threads. + */ +static int lparcfg_count_active_processors(void) +{ + struct device_node *cpus_dn; + int count = 0; + + for_each_node_by_type(cpus_dn, "cpu") { +#ifdef LPARCFG_DEBUG + printk(KERN_ERR "cpus_dn %p\n", cpus_dn); +#endif + count++; + } + return count; +} + +static void pseries_cmo_data(struct seq_file *m) +{ + int cpu; + unsigned long cmo_faults = 0; + unsigned long cmo_fault_time = 0; + + seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO)); + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + for_each_possible_cpu(cpu) { + cmo_faults += be64_to_cpu(lppaca_of(cpu).cmo_faults); + cmo_fault_time += be64_to_cpu(lppaca_of(cpu).cmo_fault_time); + } + + seq_printf(m, "cmo_faults=%lu\n", cmo_faults); + seq_printf(m, "cmo_fault_time_usec=%lu\n", + cmo_fault_time / tb_ticks_per_usec); + seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp()); + seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp()); + seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size()); +} + +static void splpar_dispatch_data(struct seq_file *m) +{ + int cpu; + unsigned long dispatches = 0; + unsigned long dispatch_dispersions = 0; + + for_each_possible_cpu(cpu) { + dispatches += be32_to_cpu(lppaca_of(cpu).yield_count); + dispatch_dispersions += + be32_to_cpu(lppaca_of(cpu).dispersion_count); + } + + seq_printf(m, "dispatches=%lu\n", dispatches); + seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions); +} + +static void parse_em_data(struct seq_file *m) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (firmware_has_feature(FW_FEATURE_LPAR) && + plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); +} + +static void maxmem_data(struct seq_file *m) +{ + unsigned long maxmem = 0; + + maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += hugetlb_total_pages() * PAGE_SIZE; + + seq_printf(m, "MaxMem=%lu\n", maxmem); +} + +static int pseries_lparcfg_data(struct seq_file *m, void *v) +{ + int partition_potential_processors; + int partition_active_processors; + struct device_node *rtas_node; + const __be32 *lrdrp = NULL; + + rtas_node = of_find_node_by_path("/rtas"); + if (rtas_node) + lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL); + + if (lrdrp == NULL) { + partition_potential_processors = vdso_data->processorCount; + } else { + partition_potential_processors = be32_to_cpup(lrdrp + 4); + } + of_node_put(rtas_node); + + partition_active_processors = lparcfg_count_active_processors(); + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + /* this call handles the ibm,get-system-parameter contents */ + read_lpar_name(m); + parse_system_parameter_string(m); + parse_ppp_data(m); + parse_mpp_data(m); + parse_mpp_x_data(m); + pseries_cmo_data(m); + splpar_dispatch_data(m); + + seq_printf(m, "purr=%ld\n", get_purr()); + seq_printf(m, "tbr=%ld\n", mftb()); + } else { /* non SPLPAR case */ + + seq_printf(m, "system_active_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "system_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "partition_max_entitled_capacity=%d\n", + partition_potential_processors * 100); + + seq_printf(m, "partition_entitled_capacity=%d\n", + partition_active_processors * 100); + } + + show_gpci_data(m); + + seq_printf(m, "partition_active_processors=%d\n", + partition_active_processors); + + seq_printf(m, "partition_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "shared_processor_mode=%d\n", + lppaca_shared_proc()); + +#ifdef CONFIG_PPC_64S_HASH_MMU + if (!radix_enabled()) + seq_printf(m, "slb_size=%d\n", mmu_slb_size); +#endif + parse_em_data(m); + maxmem_data(m); + + seq_printf(m, "security_flavor=%u\n", pseries_security_flavor); + + return 0; +} + +static ssize_t update_ppp(u64 *entitlement, u8 *weight) +{ + struct hvcall_ppp_data ppp_data; + u8 new_weight; + u64 new_entitled; + ssize_t retval; + + /* Get our current parameters */ + retval = h_get_ppp(&ppp_data); + if (retval) + return retval; + + if (entitlement) { + new_weight = ppp_data.weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = ppp_data.entitlement; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %llu, current_weight = %u\n", + __func__, ppp_data.entitlement, ppp_data.weight); + + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", + __func__, new_entitled, new_weight); + + retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); + return retval; +} + +/** + * update_mpp + * + * Update the memory entitlement and weight for the partition. Caller must + * specify either a new entitlement or weight, not both, to be updated + * since the h_set_mpp call takes both entitlement and weight as parameters. + */ +static ssize_t update_mpp(u64 *entitlement, u8 *weight) +{ + struct hvcall_mpp_data mpp_data; + u64 new_entitled; + u8 new_weight; + ssize_t rc; + + if (entitlement) { + /* Check with vio to ensure the new memory entitlement + * can be handled. + */ + rc = vio_cmo_entitlement_update(*entitlement); + if (rc) + return rc; + } + + rc = h_get_mpp(&mpp_data); + if (rc) + return rc; + + if (entitlement) { + new_weight = mpp_data.mem_weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = mpp_data.entitled_mem; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __func__, mpp_data.entitled_mem, mpp_data.mem_weight); + + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", + __func__, new_entitled, new_weight); + + rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); + return rc; +} + +/* + * Interface for changing system parameters (variable capacity weight + * and entitled capacity). Format of input is "param_name=value"; + * anything after value is ignored. Valid parameters at this time are + * "partition_entitled_capacity" and "capacity_weight". We use + * H_SET_PPP to alter parameters. + * + * This function should be invoked only on systems with + * FW_FEATURE_SPLPAR. + */ +static ssize_t lparcfg_write(struct file *file, const char __user * buf, + size_t count, loff_t * off) +{ + char kbuf[64]; + char *tmp; + u64 new_entitled, *new_entitled_ptr = &new_entitled; + u8 new_weight, *new_weight_ptr = &new_weight; + ssize_t retval; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return -EINVAL; + + if (count > sizeof(kbuf)) + return -EINVAL; + + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + + kbuf[count - 1] = '\0'; + tmp = strchr(kbuf, '='); + if (!tmp) + return -EINVAL; + + *tmp++ = '\0'; + + if (!strcmp(kbuf, "partition_entitled_capacity")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_ppp(new_entitled_ptr, NULL); + + if (retval == H_SUCCESS || retval == H_CONSTRAINED) { + /* + * The hypervisor assigns VAS resources based + * on entitled capacity for shared mode. + * Reconfig VAS windows based on DLPAR CPU events. + */ + if (pseries_vas_dlpar_cpu() != 0) + retval = H_HARDWARE; + } + } else if (!strcmp(kbuf, "capacity_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_ppp(NULL, new_weight_ptr); + } else if (!strcmp(kbuf, "entitled_memory")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_mpp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "entitled_memory_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_mpp(NULL, new_weight_ptr); + } else + return -EINVAL; + + if (retval == H_SUCCESS || retval == H_CONSTRAINED) { + retval = count; + } else if (retval == H_BUSY) { + retval = -EBUSY; + } else if (retval == H_HARDWARE) { + retval = -EIO; + } else if (retval == H_PARAMETER) { + retval = -EINVAL; + } + + return retval; +} + +static int lparcfg_data(struct seq_file *m, void *v) +{ + struct device_node *rootdn; + const char *model = ""; + const char *system_id = ""; + const char *tmp; + const __be32 *lp_index_ptr; + unsigned int lp_index = 0; + + seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS); + + rootdn = of_find_node_by_path("/"); + if (rootdn) { + tmp = of_get_property(rootdn, "model", NULL); + if (tmp) + model = tmp; + tmp = of_get_property(rootdn, "system-id", NULL); + if (tmp) + system_id = tmp; + lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", + NULL); + if (lp_index_ptr) + lp_index = be32_to_cpup(lp_index_ptr); + of_node_put(rootdn); + } + seq_printf(m, "serial_number=%s\n", system_id); + seq_printf(m, "system_type=%s\n", model); + seq_printf(m, "partition_id=%d\n", (int)lp_index); + + return pseries_lparcfg_data(m, v); +} + +static int lparcfg_open(struct inode *inode, struct file *file) +{ + return single_open(file, lparcfg_data, NULL); +} + +static const struct proc_ops lparcfg_proc_ops = { + .proc_read = seq_read, + .proc_write = lparcfg_write, + .proc_open = lparcfg_open, + .proc_release = single_release, + .proc_lseek = seq_lseek, +}; + +static int __init lparcfg_init(void) +{ + umode_t mode = 0444; + + /* Allow writing if we have FW_FEATURE_SPLPAR */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + mode |= 0200; + + if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_proc_ops)) { + printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); + return -EIO; + } + return 0; +} +machine_device_initcall(pseries, lparcfg_init); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c new file mode 100644 index 0000000000..0161226d8f --- /dev/null +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -0,0 +1,830 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support for Partition Mobility/Migration + * + * Copyright (C) 2010 Nathan Fontenot + * Copyright (C) 2010 IBM Corporation + */ + + +#define pr_fmt(fmt) "mobility: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "pseries.h" +#include "vas.h" /* vas_migration_handler() */ +#include "../../kernel/cacheinfo.h" + +static struct kobject *mobility_kobj; + +struct update_props_workarea { + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; +} __packed; + +#define NODE_ACTION_MASK 0xff000000 +#define NODE_COUNT_MASK 0x00ffffff + +#define DELETE_DT_NODE 0x01000000 +#define UPDATE_DT_NODE 0x02000000 +#define ADD_DT_NODE 0x03000000 + +#define MIGRATION_SCOPE (1) +#define PRRN_SCOPE -2 + +#ifdef CONFIG_PPC_WATCHDOG +static unsigned int nmi_wd_lpm_factor = 200; + +#ifdef CONFIG_SYSCTL +static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = { + { + .procname = "nmi_wd_lpm_factor", + .data = &nmi_wd_lpm_factor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + }, + {} +}; + +static int __init register_nmi_wd_lpm_factor_sysctl(void) +{ + register_sysctl("kernel", nmi_wd_lpm_factor_ctl_table); + + return 0; +} +device_initcall(register_nmi_wd_lpm_factor_sysctl); +#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_PPC_WATCHDOG */ + +static int mobility_rtas_call(int token, char *buf, s32 scope) +{ + int rc; + + spin_lock(&rtas_data_buf_lock); + + memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE); + rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope); + memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); + + spin_unlock(&rtas_data_buf_lock); + return rc; +} + +static int delete_dt_node(struct device_node *dn) +{ + struct device_node *pdn; + bool is_platfac; + + pdn = of_get_parent(dn); + is_platfac = of_node_is_type(dn, "ibm,platform-facilities") || + of_node_is_type(pdn, "ibm,platform-facilities"); + of_node_put(pdn); + + /* + * The drivers that bind to nodes in the platform-facilities + * hierarchy don't support node removal, and the removal directive + * from firmware is always followed by an add of an equivalent + * node. The capability (e.g. RNG, encryption, compression) + * represented by the node is never interrupted by the migration. + * So ignore changes to this part of the tree. + */ + if (is_platfac) { + pr_notice("ignoring remove operation for %pOFfp\n", dn); + return 0; + } + + pr_debug("removing node %pOFfp\n", dn); + dlpar_detach_node(dn); + return 0; +} + +static int update_dt_property(struct device_node *dn, struct property **prop, + const char *name, u32 vd, char *value) +{ + struct property *new_prop = *prop; + int more = 0; + + /* A negative 'vd' value indicates that only part of the new property + * value is contained in the buffer and we need to call + * ibm,update-properties again to get the rest of the value. + * + * A negative value is also the two's compliment of the actual value. + */ + if (vd & 0x80000000) { + vd = ~vd + 1; + more = 1; + } + + if (new_prop) { + /* partial property fixup */ + char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL); + if (!new_data) + return -ENOMEM; + + memcpy(new_data, new_prop->value, new_prop->length); + memcpy(new_data + new_prop->length, value, vd); + + kfree(new_prop->value); + new_prop->value = new_data; + new_prop->length += vd; + } else { + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return -ENOMEM; + + new_prop->name = kstrdup(name, GFP_KERNEL); + if (!new_prop->name) { + kfree(new_prop); + return -ENOMEM; + } + + new_prop->length = vd; + new_prop->value = kzalloc(new_prop->length, GFP_KERNEL); + if (!new_prop->value) { + kfree(new_prop->name); + kfree(new_prop); + return -ENOMEM; + } + + memcpy(new_prop->value, value, vd); + *prop = new_prop; + } + + if (!more) { + pr_debug("updating node %pOF property %s\n", dn, name); + of_update_property(dn, new_prop); + *prop = NULL; + } + + return 0; +} + +static int update_dt_node(struct device_node *dn, s32 scope) +{ + struct update_props_workarea *upwa; + struct property *prop = NULL; + int i, rc, rtas_rc; + char *prop_data; + char *rtas_buf; + int update_properties_token; + u32 nprops; + u32 vd; + + update_properties_token = rtas_function_token(RTAS_FN_IBM_UPDATE_PROPERTIES); + if (update_properties_token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!rtas_buf) + return -ENOMEM; + + upwa = (struct update_props_workarea *)&rtas_buf[0]; + upwa->phandle = cpu_to_be32(dn->phandle); + + do { + rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf, + scope); + if (rtas_rc < 0) + break; + + prop_data = rtas_buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); + + /* On the first call to ibm,update-properties for a node the + * first property value descriptor contains an empty + * property name, the property value length encoded as u32, + * and the property value is the node path being updated. + */ + if (*prop_data == 0) { + prop_data++; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += vd + sizeof(vd); + nprops--; + } + + for (i = 0; i < nprops; i++) { + char *prop_name; + + prop_name = prop_data; + prop_data += strlen(prop_name) + 1; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += sizeof(vd); + + switch (vd) { + case 0x00000000: + /* name only property, nothing to do */ + break; + + case 0x80000000: + of_remove_property(dn, of_find_property(dn, + prop_name, NULL)); + prop = NULL; + break; + + default: + rc = update_dt_property(dn, &prop, prop_name, + vd, prop_data); + if (rc) { + pr_err("updating %s property failed: %d\n", + prop_name, rc); + } + + prop_data += vd; + break; + } + + cond_resched(); + } + + cond_resched(); + } while (rtas_rc == 1); + + kfree(rtas_buf); + return 0; +} + +static int add_dt_node(struct device_node *parent_dn, __be32 drc_index) +{ + struct device_node *dn; + int rc; + + dn = dlpar_configure_connector(drc_index, parent_dn); + if (!dn) + return -ENOENT; + + /* + * Since delete_dt_node() ignores this node type, this is the + * necessary counterpart. We also know that a platform-facilities + * node returned from dlpar_configure_connector() has children + * attached, and dlpar_attach_node() only adds the parent, leaking + * the children. So ignore these on the add side for now. + */ + if (of_node_is_type(dn, "ibm,platform-facilities")) { + pr_notice("ignoring add operation for %pOF\n", dn); + dlpar_free_cc_nodes(dn); + return 0; + } + + rc = dlpar_attach_node(dn, parent_dn); + if (rc) + dlpar_free_cc_nodes(dn); + + pr_debug("added node %pOFfp\n", dn); + + return rc; +} + +static int pseries_devicetree_update(s32 scope) +{ + char *rtas_buf; + __be32 *data; + int update_nodes_token; + int rc; + + update_nodes_token = rtas_function_token(RTAS_FN_IBM_UPDATE_NODES); + if (update_nodes_token == RTAS_UNKNOWN_SERVICE) + return 0; + + rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!rtas_buf) + return -ENOMEM; + + do { + rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope); + if (rc && rc != 1) + break; + + data = (__be32 *)rtas_buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { + int i; + u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK; + u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; + + data++; + + for (i = 0; i < node_count; i++) { + struct device_node *np; + __be32 phandle = *data++; + __be32 drc_index; + + np = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!np) { + pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n", + be32_to_cpu(phandle), action); + continue; + } + + switch (action) { + case DELETE_DT_NODE: + delete_dt_node(np); + break; + case UPDATE_DT_NODE: + update_dt_node(np, scope); + break; + case ADD_DT_NODE: + drc_index = *data++; + add_dt_node(np, drc_index); + break; + } + + of_node_put(np); + cond_resched(); + } + } + + cond_resched(); + } while (rc == 1); + + kfree(rtas_buf); + return rc; +} + +void post_mobility_fixup(void) +{ + int rc; + + rtas_activate_firmware(); + + /* + * We don't want CPUs to go online/offline while the device + * tree is being updated. + */ + cpus_read_lock(); + + /* + * It's common for the destination firmware to replace cache + * nodes. Release all of the cacheinfo hierarchy's references + * before updating the device tree. + */ + cacheinfo_teardown(); + + rc = pseries_devicetree_update(MIGRATION_SCOPE); + if (rc) + pr_err("device tree update failed: %d\n", rc); + + cacheinfo_rebuild(); + + cpus_read_unlock(); + + /* Possibly switch to a new L1 flush type */ + pseries_setup_security_mitigations(); + + /* Reinitialise system information for hv-24x7 */ + read_24x7_sys_info(); + + return; +} + +static int poll_vasi_state(u64 handle, unsigned long *res) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long hvrc; + int ret; + + hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle); + switch (hvrc) { + case H_SUCCESS: + ret = 0; + *res = retbuf[0]; + break; + case H_PARAMETER: + ret = -EINVAL; + break; + case H_FUNCTION: + ret = -EOPNOTSUPP; + break; + case H_HARDWARE: + default: + pr_err("unexpected H_VASI_STATE result %ld\n", hvrc); + ret = -EIO; + break; + } + return ret; +} + +static int wait_for_vasi_session_suspending(u64 handle) +{ + unsigned long state; + int ret; + + /* + * Wait for transition from H_VASI_ENABLED to + * H_VASI_SUSPENDING. Treat anything else as an error. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + if (ret != 0 || state == H_VASI_SUSPENDING) { + break; + } else if (state == H_VASI_ENABLED) { + ssleep(1); + } else { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + ret = -EIO; + break; + } + } + + /* + * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or + * ibm,suspend-me are also unimplemented, we'll recover then. + */ + if (ret == -EOPNOTSUPP) + ret = 0; + + return ret; +} + +static void wait_for_vasi_session_completed(u64 handle) +{ + unsigned long state = 0; + int ret; + + pr_info("waiting for memory transfer to complete...\n"); + + /* + * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED. + */ + while (true) { + ret = poll_vasi_state(handle, &state); + + /* + * If the memory transfer is already complete and the migration + * has been cleaned up by the hypervisor, H_PARAMETER is return, + * which is translate in EINVAL by poll_vasi_state(). + */ + if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) { + pr_info("memory transfer completed.\n"); + break; + } + + if (ret) { + pr_err("H_VASI_STATE return error (%d)\n", ret); + break; + } + + if (state != H_VASI_RESUMED) { + pr_err("unexpected H_VASI_STATE result %lu\n", state); + break; + } + + msleep(500); + } +} + +static void prod_single(unsigned int target_cpu) +{ + long hvrc; + int hwid; + + hwid = get_hard_smp_processor_id(target_cpu); + hvrc = plpar_hcall_norets(H_PROD, hwid); + if (hvrc == H_SUCCESS) + return; + pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n", + target_cpu, hwid, hvrc); +} + +static void prod_others(void) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + prod_single(cpu); + } +} + +static u16 clamp_slb_size(void) +{ +#ifdef CONFIG_PPC_64S_HASH_MMU + u16 prev = mmu_slb_size; + + slb_set_size(SLB_MIN_SIZE); + + return prev; +#else + return 0; +#endif +} + +static int do_suspend(void) +{ + u16 saved_slb_size; + int status; + int ret; + + pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id()); + + /* + * The destination processor model may have fewer SLB entries + * than the source. We reduce mmu_slb_size to a safe minimum + * before suspending in order to minimize the possibility of + * programming non-existent entries on the destination. If + * suspend fails, we restore it before returning. On success + * the OF reconfig path will update it from the new device + * tree after resuming on the destination. + */ + saved_slb_size = clamp_slb_size(); + + ret = rtas_ibm_suspend_me(&status); + if (ret != 0) { + pr_err("ibm,suspend-me error: %d\n", status); + slb_set_size(saved_slb_size); + } + + return ret; +} + +/** + * struct pseries_suspend_info - State shared between CPUs for join/suspend. + * @counter: Threads are to increment this upon resuming from suspend + * or if an error is received from H_JOIN. The thread which performs + * the first increment (i.e. sets it to 1) is responsible for + * waking the other threads. + * @done: False if join/suspend is in progress. True if the operation is + * complete (successful or not). + */ +struct pseries_suspend_info { + atomic_t counter; + bool done; +}; + +static int do_join(void *arg) +{ + struct pseries_suspend_info *info = arg; + atomic_t *counter = &info->counter; + long hvrc; + int ret; + +retry: + /* Must ensure MSR.EE off for H_JOIN. */ + hard_irq_disable(); + hvrc = plpar_hcall_norets(H_JOIN); + + switch (hvrc) { + case H_CONTINUE: + /* + * All other CPUs are offline or in H_JOIN. This CPU + * attempts the suspend. + */ + ret = do_suspend(); + break; + case H_SUCCESS: + /* + * The suspend is complete and this cpu has received a + * prod, or we've received a stray prod from unrelated + * code (e.g. paravirt spinlocks) and we need to join + * again. + * + * This barrier orders the return from H_JOIN above vs + * the load of info->done. It pairs with the barrier + * in the wakeup/prod path below. + */ + smp_mb(); + if (READ_ONCE(info->done) == false) { + pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying", + smp_processor_id()); + goto retry; + } + ret = 0; + break; + case H_BAD_MODE: + case H_HARDWARE: + default: + ret = -EIO; + pr_err_ratelimited("H_JOIN error %ld on CPU %i\n", + hvrc, smp_processor_id()); + break; + } + + if (atomic_inc_return(counter) == 1) { + pr_info("CPU %u waking all threads\n", smp_processor_id()); + WRITE_ONCE(info->done, true); + /* + * This barrier orders the store to info->done vs subsequent + * H_PRODs to wake the other CPUs. It pairs with the barrier + * in the H_SUCCESS case above. + */ + smp_mb(); + prod_others(); + } + /* + * Execution may have been suspended for several seconds, so reset + * the watchdogs. touch_nmi_watchdog() also touches the soft lockup + * watchdog. + */ + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); + + return ret; +} + +/* + * Abort reason code byte 0. We use only the 'Migrating partition' value. + */ +enum vasi_aborting_entity { + ORCHESTRATOR = 1, + VSP_SOURCE = 2, + PARTITION_FIRMWARE = 3, + PLATFORM_FIRMWARE = 4, + VSP_TARGET = 5, + MIGRATING_PARTITION = 6, +}; + +static void pseries_cancel_migration(u64 handle, int err) +{ + u32 reason_code; + u32 detail; + u8 entity; + long hvrc; + + entity = MIGRATING_PARTITION; + detail = abs(err) & 0xffffff; + reason_code = (entity << 24) | detail; + + hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle, + H_VASI_SIGNAL_CANCEL, reason_code); + if (hvrc) + pr_err("H_VASI_SIGNAL error: %ld\n", hvrc); +} + +static int pseries_suspend(u64 handle) +{ + const unsigned int max_attempts = 5; + unsigned int retry_interval_ms = 1; + unsigned int attempt = 1; + int ret; + + while (true) { + struct pseries_suspend_info info; + unsigned long vasi_state; + int vasi_err; + + info = (struct pseries_suspend_info) { + .counter = ATOMIC_INIT(0), + .done = false, + }; + + ret = stop_machine(do_join, &info, cpu_online_mask); + if (ret == 0) + break; + /* + * Encountered an error. If the VASI stream is still + * in Suspending state, it's likely a transient + * condition related to some device in the partition + * and we can retry in the hope that the cause has + * cleared after some delay. + * + * A better design would allow drivers etc to prepare + * for the suspend and avoid conditions which prevent + * the suspend from succeeding. For now, we have this + * mitigation. + */ + pr_notice("Partition suspend attempt %u of %u error: %d\n", + attempt, max_attempts, ret); + + if (attempt == max_attempts) + break; + + vasi_err = poll_vasi_state(handle, &vasi_state); + if (vasi_err == 0) { + if (vasi_state != H_VASI_SUSPENDING) { + pr_notice("VASI state %lu after failed suspend\n", + vasi_state); + break; + } + } else if (vasi_err != -EOPNOTSUPP) { + pr_err("VASI state poll error: %d", vasi_err); + break; + } + + pr_notice("Will retry partition suspend after %u ms\n", + retry_interval_ms); + + msleep(retry_interval_ms); + retry_interval_ms *= 10; + attempt++; + } + + return ret; +} + +static int pseries_migrate_partition(u64 handle) +{ + int ret; + unsigned int factor = 0; + +#ifdef CONFIG_PPC_WATCHDOG + factor = nmi_wd_lpm_factor; +#endif + /* + * When the migration is initiated, the hypervisor changes VAS + * mappings to prepare before OS gets the notification and + * closes all VAS windows. NX generates continuous faults during + * this time and the user space can not differentiate these + * faults from the migration event. So reduce this time window + * by closing VAS windows at the beginning of this function. + */ + vas_migration_handler(VAS_SUSPEND); + + ret = wait_for_vasi_session_suspending(handle); + if (ret) + goto out; + + if (factor) + watchdog_hardlockup_set_timeout_pct(factor); + + ret = pseries_suspend(handle); + if (ret == 0) { + post_mobility_fixup(); + /* + * Wait until the memory transfer is complete, so that the user + * space process returns from the syscall after the transfer is + * complete. This allows the user hooks to be executed at the + * right time. + */ + wait_for_vasi_session_completed(handle); + } else + pseries_cancel_migration(handle, ret); + + if (factor) + watchdog_hardlockup_set_timeout_pct(0); + +out: + vas_migration_handler(VAS_RESUME); + + return ret; +} + +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle) +{ + return pseries_migrate_partition(handle); +} + +static ssize_t migration_store(const struct class *class, + const struct class_attribute *attr, const char *buf, + size_t count) +{ + u64 streamid; + int rc; + + rc = kstrtou64(buf, 0, &streamid); + if (rc) + return rc; + + rc = pseries_migrate_partition(streamid); + if (rc) + return rc; + + return count; +} + +/* + * Used by drmgr to determine the kernel behavior of the migration interface. + * + * Version 1: Performs all PAPR requirements for migration including + * firmware activation and device tree update. + */ +#define MIGRATION_API_VERSION 1 + +static CLASS_ATTR_WO(migration); +static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION)); + +static int __init mobility_sysfs_init(void) +{ + int rc; + + mobility_kobj = kobject_create_and_add("mobility", kernel_kobj); + if (!mobility_kobj) + return -ENOMEM; + + rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr); + if (rc) + pr_err("unable to create migration sysfs file (%d)\n", rc); + + rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr); + if (rc) + pr_err("unable to create api_version sysfs file (%d)\n", rc); + + return 0; +} +machine_device_initcall(pseries, mobility_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c new file mode 100644 index 0000000000..423ee1d5bd --- /dev/null +++ b/arch/powerpc/platforms/pseries/msi.c @@ -0,0 +1,698 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2006 Jake Moilanen , IBM Corp. + * Copyright 2006-2007 Michael Ellerman, IBM Corp. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pseries.h" + +static int query_token, change_token; + +#define RTAS_QUERY_FN 0 +#define RTAS_CHANGE_FN 1 +#define RTAS_RESET_FN 2 +#define RTAS_CHANGE_MSI_FN 3 +#define RTAS_CHANGE_MSIX_FN 4 +#define RTAS_CHANGE_32MSI_FN 5 + +/* RTAS Helpers */ + +static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs) +{ + u32 addr, seq_num, rtas_ret[3]; + unsigned long buid; + int rc; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + + seq_num = 1; + do { + if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN || + func == RTAS_CHANGE_32MSI_FN) + rc = rtas_call(change_token, 6, 4, rtas_ret, addr, + BUID_HI(buid), BUID_LO(buid), + func, num_irqs, seq_num); + else + rc = rtas_call(change_token, 6, 3, rtas_ret, addr, + BUID_HI(buid), BUID_LO(buid), + func, num_irqs, seq_num); + + seq_num = rtas_ret[1]; + } while (rtas_busy_delay(rc)); + + /* + * If the RTAS call succeeded, return the number of irqs allocated. + * If not, make sure we return a negative error code. + */ + if (rc == 0) + rc = rtas_ret[0]; + else if (rc > 0) + rc = -rc; + + pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n", + func, num_irqs, rtas_ret[0], rc); + + return rc; +} + +static void rtas_disable_msi(struct pci_dev *pdev) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + if (!pdn) + return; + + /* + * disabling MSI with the explicit interface also disables MSI-X + */ + if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) { + /* + * may have failed because explicit interface is not + * present + */ + if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) { + pr_debug("rtas_msi: Setting MSIs to 0 failed!\n"); + } + } +} + +static int rtas_query_irq_number(struct pci_dn *pdn, int offset) +{ + u32 addr, rtas_ret[2]; + unsigned long buid; + int rc; + + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + + do { + rc = rtas_call(query_token, 4, 3, rtas_ret, addr, + BUID_HI(buid), BUID_LO(buid), offset); + } while (rtas_busy_delay(rc)); + + if (rc) { + pr_debug("rtas_msi: error (%d) querying source number\n", rc); + return rc; + } + + return rtas_ret[0]; +} + +static int check_req(struct pci_dev *pdev, int nvec, char *prop_name) +{ + struct device_node *dn; + const __be32 *p; + u32 req_msi; + + dn = pci_device_to_OF_node(pdev); + + p = of_get_property(dn, prop_name, NULL); + if (!p) { + pr_debug("rtas_msi: No %s on %pOF\n", prop_name, dn); + return -ENOENT; + } + + req_msi = be32_to_cpup(p); + if (req_msi < nvec) { + pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec); + + if (req_msi == 0) /* Be paranoid */ + return -ENOSPC; + + return req_msi; + } + + return 0; +} + +static int check_req_msi(struct pci_dev *pdev, int nvec) +{ + return check_req(pdev, nvec, "ibm,req#msi"); +} + +static int check_req_msix(struct pci_dev *pdev, int nvec) +{ + return check_req(pdev, nvec, "ibm,req#msi-x"); +} + +/* Quota calculation */ + +static struct device_node *__find_pe_total_msi(struct device_node *node, int *total) +{ + struct device_node *dn; + const __be32 *p; + + dn = of_node_get(node); + while (dn) { + p = of_get_property(dn, "ibm,pe-total-#msi", NULL); + if (p) { + pr_debug("rtas_msi: found prop on dn %pOF\n", + dn); + *total = be32_to_cpup(p); + return dn; + } + + dn = of_get_next_parent(dn); + } + + return NULL; +} + +static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) +{ + return __find_pe_total_msi(pci_device_to_OF_node(dev), total); +} + +static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) +{ + struct device_node *dn; + struct eeh_dev *edev; + + /* Found our PE and assume 8 at that point. */ + + dn = pci_device_to_OF_node(dev); + if (!dn) + return NULL; + + /* Get the top level device in the PE */ + edev = pdn_to_eeh_dev(PCI_DN(dn)); + if (edev->pe) + edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, + entry); + dn = pci_device_to_OF_node(edev->pdev); + if (!dn) + return NULL; + + /* We actually want the parent */ + dn = of_get_parent(dn); + if (!dn) + return NULL; + + /* Hardcode of 8 for old firmwares */ + *total = 8; + pr_debug("rtas_msi: using PE dn %pOF\n", dn); + + return dn; +} + +struct msi_counts { + struct device_node *requestor; + int num_devices; + int request; + int quota; + int spare; + int over_quota; +}; + +static void *count_non_bridge_devices(struct device_node *dn, void *data) +{ + struct msi_counts *counts = data; + const __be32 *p; + u32 class; + + pr_debug("rtas_msi: counting %pOF\n", dn); + + p = of_get_property(dn, "class-code", NULL); + class = p ? be32_to_cpup(p) : 0; + + if ((class >> 8) != PCI_CLASS_BRIDGE_PCI) + counts->num_devices++; + + return NULL; +} + +static void *count_spare_msis(struct device_node *dn, void *data) +{ + struct msi_counts *counts = data; + const __be32 *p; + int req; + + if (dn == counts->requestor) + req = counts->request; + else { + /* We don't know if a driver will try to use MSI or MSI-X, + * so we just have to punt and use the larger of the two. */ + req = 0; + p = of_get_property(dn, "ibm,req#msi", NULL); + if (p) + req = be32_to_cpup(p); + + p = of_get_property(dn, "ibm,req#msi-x", NULL); + if (p) + req = max(req, (int)be32_to_cpup(p)); + } + + if (req < counts->quota) + counts->spare += counts->quota - req; + else if (req > counts->quota) + counts->over_quota++; + + return NULL; +} + +static int msi_quota_for_device(struct pci_dev *dev, int request) +{ + struct device_node *pe_dn; + struct msi_counts counts; + int total; + + pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev), + request); + + pe_dn = find_pe_total_msi(dev, &total); + if (!pe_dn) + pe_dn = find_pe_dn(dev, &total); + + if (!pe_dn) { + pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev)); + goto out; + } + + pr_debug("rtas_msi: found PE %pOF\n", pe_dn); + + memset(&counts, 0, sizeof(struct msi_counts)); + + /* Work out how many devices we have below this PE */ + pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts); + + if (counts.num_devices == 0) { + pr_err("rtas_msi: found 0 devices under PE for %s\n", + pci_name(dev)); + goto out; + } + + counts.quota = total / counts.num_devices; + if (request <= counts.quota) + goto out; + + /* else, we have some more calculating to do */ + counts.requestor = pci_device_to_OF_node(dev); + counts.request = request; + pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts); + + /* If the quota isn't an integer multiple of the total, we can + * use the remainder as spare MSIs for anyone that wants them. */ + counts.spare += total % counts.num_devices; + + /* Divide any spare by the number of over-quota requestors */ + if (counts.over_quota) + counts.quota += counts.spare / counts.over_quota; + + /* And finally clamp the request to the possibly adjusted quota */ + request = min(counts.quota, request); + + pr_debug("rtas_msi: request clamped to quota %d\n", request); +out: + of_node_put(pe_dn); + + return request; +} + +static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev) +{ + u32 addr_hi, addr_lo; + + /* + * We should only get in here for IODA1 configs. This is based on the + * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS + * support, and we are in a PCIe Gen2 slot. + */ + dev_info(&pdev->dev, + "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n"); + pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi); + addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4); + pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo); + pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0); +} + +static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type, + msi_alloc_info_t *arg) +{ + struct pci_dn *pdn; + int quota, rc; + int nvec = nvec_in; + int use_32bit_msi_hack = 0; + + if (type == PCI_CAP_ID_MSIX) + rc = check_req_msix(pdev, nvec); + else + rc = check_req_msi(pdev, nvec); + + if (rc) + return rc; + + quota = msi_quota_for_device(pdev, nvec); + + if (quota && quota < nvec) + return quota; + + /* + * Firmware currently refuse any non power of two allocation + * so we round up if the quota will allow it. + */ + if (type == PCI_CAP_ID_MSIX) { + int m = roundup_pow_of_two(nvec); + quota = msi_quota_for_device(pdev, m); + + if (quota >= m) + nvec = m; + } + + pdn = pci_get_pdn(pdev); + + /* + * Try the new more explicit firmware interface, if that fails fall + * back to the old interface. The old interface is known to never + * return MSI-Xs. + */ +again: + if (type == PCI_CAP_ID_MSI) { + if (pdev->no_64bit_msi) { + rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); + if (rc < 0) { + /* + * We only want to run the 32 bit MSI hack below if + * the max bus speed is Gen2 speed + */ + if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT) + return rc; + + use_32bit_msi_hack = 1; + } + } else + rc = -1; + + if (rc < 0) + rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec); + + if (rc < 0) { + pr_debug("rtas_msi: trying the old firmware call.\n"); + rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec); + } + + if (use_32bit_msi_hack && rc > 0) + rtas_hack_32bit_msi_gen2(pdev); + } else + rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); + + if (rc != nvec) { + if (nvec != nvec_in) { + nvec = nvec_in; + goto again; + } + pr_debug("rtas_msi: rtas_change_msi() failed\n"); + return rc; + } + + return 0; +} + +static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int type = pdev->msix_enabled ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI; + + return rtas_prepare_msi_irqs(pdev, nvec, type, arg); +} + +/* + * ->msi_free() is called before irq_domain_free_irqs_top() when the + * handler data is still available. Use that to clear the XIVE + * controller data. + */ +static void pseries_msi_ops_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, + unsigned int irq) +{ + if (xive_enabled()) + xive_irq_free_data(irq); +} + +/* + * RTAS can not disable one MSI at a time. It's all or nothing. Do it + * at the end after all IRQs have been freed. + */ +static void pseries_msi_post_free(struct irq_domain *domain, struct device *dev) +{ + if (WARN_ON_ONCE(!dev_is_pci(dev))) + return; + + rtas_disable_msi(to_pci_dev(dev)); +} + +static struct msi_domain_ops pseries_pci_msi_domain_ops = { + .msi_prepare = pseries_msi_ops_prepare, + .msi_free = pseries_msi_ops_msi_free, + .msi_post_free = pseries_msi_post_free, +}; + +static void pseries_msi_shutdown(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_shutdown) + d->chip->irq_shutdown(d); +} + +static void pseries_msi_mask(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void pseries_msi_unmask(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *entry = irq_data_get_msi_desc(data); + + /* + * Do not update the MSIx vector table. It's not strictly necessary + * because the table is initialized by the underlying hypervisor, PowerVM + * or QEMU/KVM. However, if the MSIx vector entry is cleared, any further + * activation will fail. This can happen in some drivers (eg. IPR) which + * deactivate an IRQ used for testing MSI support. + */ + entry->msg = *msg; +} + +static struct irq_chip pseries_pci_msi_irq_chip = { + .name = "pSeries-PCI-MSI", + .irq_shutdown = pseries_msi_shutdown, + .irq_mask = pseries_msi_mask, + .irq_unmask = pseries_msi_unmask, + .irq_eoi = irq_chip_eoi_parent, + .irq_write_msi_msg = pseries_msi_write_msg, +}; + + +/* + * Set MSI_FLAG_MSIX_CONTIGUOUS as there is no way to express to + * firmware to request a discontiguous or non-zero based range of + * MSI-X entries. Core code will reject such setup attempts. + */ +static struct msi_domain_info pseries_msi_domain_info = { + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX | + MSI_FLAG_MSIX_CONTIGUOUS), + .ops = &pseries_pci_msi_domain_ops, + .chip = &pseries_pci_msi_irq_chip, +}; + +static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); +} + +static struct irq_chip pseries_msi_irq_chip = { + .name = "pSeries-MSI", + .irq_shutdown = pseries_msi_shutdown, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = irq_chip_set_affinity_parent, + .irq_compose_msi_msg = pseries_msi_compose_msg, +}; + +static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct irq_fwspec parent_fwspec; + int ret; + + parent_fwspec.fwnode = domain->parent->fwnode; + parent_fwspec.param_count = 2; + parent_fwspec.param[0] = hwirq; + parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + + ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec); + if (ret) + return ret; + + return 0; +} + +static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct pci_controller *phb = domain->host_data; + msi_alloc_info_t *info = arg; + struct msi_desc *desc = info->desc; + struct pci_dev *pdev = msi_desc_to_pci_dev(desc); + int hwirq; + int i, ret; + + hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index); + if (hwirq < 0) { + dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq); + return hwirq; + } + + dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__, + phb->dn, virq, hwirq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i); + if (ret) + goto out; + + irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, + &pseries_msi_irq_chip, domain->host_data); + } + + return 0; + +out: + /* TODO: handle RTAS cleanup in ->msi_finish() ? */ + irq_domain_free_irqs_parent(domain, virq, i - 1); + return ret; +} + +static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct pci_controller *phb = irq_data_get_irq_chip_data(d); + + pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs); + + /* XIVE domain data is cleared through ->msi_free() */ +} + +static const struct irq_domain_ops pseries_irq_domain_ops = { + .alloc = pseries_irq_domain_alloc, + .free = pseries_irq_domain_free, +}; + +static int __pseries_msi_allocate_domains(struct pci_controller *phb, + unsigned int count) +{ + struct irq_domain *parent = irq_get_default_host(); + + phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI", + phb->global_number); + if (!phb->fwnode) + return -ENOMEM; + + phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count, + phb->fwnode, + &pseries_irq_domain_ops, phb); + if (!phb->dev_domain) { + pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + irq_domain_free_fwnode(phb->fwnode); + return -ENOMEM; + } + + phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn), + &pseries_msi_domain_info, + phb->dev_domain); + if (!phb->msi_domain) { + pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + irq_domain_free_fwnode(phb->fwnode); + irq_domain_remove(phb->dev_domain); + return -ENOMEM; + } + + return 0; +} + +int pseries_msi_allocate_domains(struct pci_controller *phb) +{ + int count; + + if (!__find_pe_total_msi(phb->dn, &count)) { + pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n", + phb->dn, phb->global_number); + return -ENOSPC; + } + + return __pseries_msi_allocate_domains(phb, count); +} + +void pseries_msi_free_domains(struct pci_controller *phb) +{ + if (phb->msi_domain) + irq_domain_remove(phb->msi_domain); + if (phb->dev_domain) + irq_domain_remove(phb->dev_domain); + if (phb->fwnode) + irq_domain_free_fwnode(phb->fwnode); +} + +static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) +{ + /* No LSI -> leave MSIs (if any) configured */ + if (!pdev->irq) { + dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n"); + return; + } + + /* No MSI -> MSIs can't have been assigned by fw, leave LSI */ + if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) { + dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n"); + return; + } + + dev_dbg(&pdev->dev, "rtas_msi: disabling existing MSI.\n"); + rtas_disable_msi(pdev); +} + +static int rtas_msi_init(void) +{ + query_token = rtas_function_token(RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER); + change_token = rtas_function_token(RTAS_FN_IBM_CHANGE_MSI); + + if ((query_token == RTAS_UNKNOWN_SERVICE) || + (change_token == RTAS_UNKNOWN_SERVICE)) { + pr_debug("rtas_msi: no RTAS tokens, no MSI support.\n"); + return -1; + } + + pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n"); + + WARN_ON(ppc_md.pci_irq_fixup); + ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup; + + return 0; +} +machine_arch_initcall(pseries, rtas_msi_init); diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c new file mode 100644 index 0000000000..8130c37962 --- /dev/null +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * c 2001 PPC 64 Team, IBM Corp + * + * /dev/nvram driver for PPC64 + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Max bytes to read/write in one go */ +#define NVRW_CNT 0x20 + +static unsigned int nvram_size; +static int nvram_fetch, nvram_store; +static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ +static DEFINE_SPINLOCK(nvram_lock); + +/* See clobbering_unread_rtas_event() */ +#define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */ +static time64_t last_unread_rtas_event; /* timestamp */ + +#ifdef CONFIG_PSTORE +time64_t last_rtas_event; +#endif + +static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + char *p = buf; + + + if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + memcpy(p, nvram_buf, len); + + p += len; + i += len; + } + + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index) +{ + unsigned int i; + unsigned long len; + int done; + unsigned long flags; + const char *p = buf; + + if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE) + return -ENODEV; + + if (*index >= nvram_size) + return 0; + + i = *index; + if (i + count > nvram_size) + count = nvram_size - i; + + spin_lock_irqsave(&nvram_lock, flags); + + for (; count != 0; count -= len) { + len = count; + if (len > NVRW_CNT) + len = NVRW_CNT; + + memcpy(nvram_buf, p, len); + + if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf), + len) != 0) || len != done) { + spin_unlock_irqrestore(&nvram_lock, flags); + return -EIO; + } + + p += len; + i += len; + } + spin_unlock_irqrestore(&nvram_lock, flags); + + *index = i; + return p - buf; +} + +static ssize_t pSeries_nvram_get_size(void) +{ + return nvram_size ? nvram_size : -ENODEV; +} + +/* nvram_write_error_log + * + * We need to buffer the error logs into nvram to ensure that we have + * the failure information to decode. + */ +int nvram_write_error_log(char * buff, int length, + unsigned int err_type, unsigned int error_log_cnt) +{ + int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, + err_type, error_log_cnt); + if (!rc) { + last_unread_rtas_event = ktime_get_real_seconds(); +#ifdef CONFIG_PSTORE + last_rtas_event = ktime_get_real_seconds(); +#endif + } + + return rc; +} + +/* nvram_read_error_log + * + * Reads nvram for error log for at most 'length' + */ +int nvram_read_error_log(char *buff, int length, + unsigned int *err_type, unsigned int *error_log_cnt) +{ + return nvram_read_partition(&rtas_log_partition, buff, length, + err_type, error_log_cnt); +} + +/* This doesn't actually zero anything, but it sets the event_logged + * word to tell that this event is safely in syslog. + */ +int nvram_clear_error_log(void) +{ + loff_t tmp_index; + int clear_word = ERR_FLAG_ALREADY_LOGGED; + int rc; + + if (rtas_log_partition.index == -1) + return -1; + + tmp_index = rtas_log_partition.index; + + rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); + if (rc <= 0) { + printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); + return rc; + } + last_unread_rtas_event = 0; + + return 0; +} + +/* + * Are we using the ibm,rtas-log for oops/panic reports? And if so, + * would logging this oops/panic overwrite an RTAS event that rtas_errd + * hasn't had a chance to read and process? Return 1 if so, else 0. + * + * We assume that if rtas_errd hasn't read the RTAS event in + * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. + */ +int clobbering_unread_rtas_event(void) +{ + return (oops_log_partition.index == rtas_log_partition.index + && last_unread_rtas_event + && ktime_get_real_seconds() - last_unread_rtas_event <= + NVRAM_RTAS_READ_TIMEOUT); +} + +static int __init pseries_nvram_init_log_partitions(void) +{ + int rc; + + /* Scan nvram for partitions */ + nvram_scan_partitions(); + + rc = nvram_init_os_partition(&rtas_log_partition); + nvram_init_oops_partition(rc == 0); + return 0; +} +machine_arch_initcall(pseries, pseries_nvram_init_log_partitions); + +int __init pSeries_nvram_init(void) +{ + struct device_node *nvram; + const __be32 *nbytes_p; + unsigned int proplen; + + nvram = of_find_node_by_type(NULL, "nvram"); + if (nvram == NULL) + return -ENODEV; + + nbytes_p = of_get_property(nvram, "#bytes", &proplen); + if (nbytes_p == NULL || proplen != sizeof(unsigned int)) { + of_node_put(nvram); + return -EIO; + } + + nvram_size = be32_to_cpup(nbytes_p); + + nvram_fetch = rtas_function_token(RTAS_FN_NVRAM_FETCH); + nvram_store = rtas_function_token(RTAS_FN_NVRAM_STORE); + printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size); + of_node_put(nvram); + + ppc_md.nvram_read = pSeries_nvram_read; + ppc_md.nvram_write = pSeries_nvram_write; + ppc_md.nvram_size = pSeries_nvram_get_size; + + return 0; +} + diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c new file mode 100644 index 0000000000..23241c71ef --- /dev/null +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +#include "of_helpers.h" + +/** + * pseries_of_derive_parent - basically like dirname(1) + * @path: the full_name of a node to be added to the tree + * + * Returns the node which should be the parent of the node + * described by path. E.g., for path = "/foo/bar", returns + * the node with full_name = "/foo". + */ +struct device_node *pseries_of_derive_parent(const char *path) +{ + struct device_node *parent; + char *parent_path = "/"; + const char *tail; + + /* We do not want the trailing '/' character */ + tail = kbasename(path) - 1; + + /* reject if path is "/" */ + if (!strcmp(path, "/")) + return ERR_PTR(-EINVAL); + + if (tail > path) { + parent_path = kstrndup(path, tail - path, GFP_KERNEL); + if (!parent_path) + return ERR_PTR(-ENOMEM); + } + parent = of_find_node_by_path(parent_path); + if (strcmp(parent_path, "/")) + kfree(parent_path); + return parent ? parent : ERR_PTR(-EINVAL); +} + + +/* Helper Routines to convert between drc_index to cpu numbers */ + +int of_read_drc_info_cell(struct property **prop, const __be32 **curval, + struct of_drc_info *data) +{ + const char *p = (char *)(*curval); + const __be32 *p2; + + if (!data) + return -EINVAL; + + /* Get drc-type:encode-string */ + data->drc_type = (char *)p; + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-name-prefix:encode-string */ + data->drc_name_prefix = (char *)p; + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-index-start:encode-int */ + p2 = (const __be32 *)p; + data->drc_index_start = be32_to_cpu(*p2); + + /* Get drc-name-suffix-start:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start); + if (!p2) + return -EINVAL; + + /* Get number-sequential-elements:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems); + if (!p2) + return -EINVAL; + + /* Get sequential-increment:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc); + if (!p2) + return -EINVAL; + + /* Get drc-power-domain:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain); + if (!p2) + return -EINVAL; + + /* Should now know end of current entry */ + (*curval) = (void *)(++p2); + data->last_drc_index = data->drc_index_start + + ((data->num_sequential_elems - 1) * data->sequential_inc); + + return 0; +} +EXPORT_SYMBOL(of_read_drc_info_cell); diff --git a/arch/powerpc/platforms/pseries/of_helpers.h b/arch/powerpc/platforms/pseries/of_helpers.h new file mode 100644 index 0000000000..decad6553d --- /dev/null +++ b/arch/powerpc/platforms/pseries/of_helpers.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PSERIES_OF_HELPERS_H +#define _PSERIES_OF_HELPERS_H + +#include + +struct device_node *pseries_of_derive_parent(const char *path); + +#endif /* _PSERIES_OF_HELPERS_H */ diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c new file mode 100644 index 0000000000..fedc61599e --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-sysparm.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-sysparm: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +struct papr_sysparm_buf *papr_sysparm_buf_alloc(void) +{ + struct papr_sysparm_buf *buf = kzalloc(sizeof(*buf), GFP_KERNEL); + + return buf; +} + +void papr_sysparm_buf_free(struct papr_sysparm_buf *buf) +{ + kfree(buf); +} + +/** + * papr_sysparm_get() - Retrieve the value of a PAPR system parameter. + * @param: PAPR system parameter token as described in + * 7.3.16 "System Parameters Option". + * @buf: A &struct papr_sysparm_buf as returned from papr_sysparm_buf_alloc(). + * + * Place the result of querying the specified parameter, if available, + * in @buf. The result includes a be16 length header followed by the + * value, which may be a string or binary data. See &struct papr_sysparm_buf. + * + * Since there is at least one parameter (60, OS Service Entitlement + * Status) where the results depend on the incoming contents of the + * work area, the caller-supplied buffer is copied unmodified into the + * work area before calling ibm,get-system-parameter. + * + * A defined parameter may not be implemented on a given system, and + * some implemented parameters may not be available to all partitions + * on a system. A parameter's disposition may change at any time due + * to system configuration changes or partition migration. + * + * Context: This function may sleep. + * + * Return: 0 on success, -errno otherwise. @buf is unmodified on error. + */ + +int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf) +{ + const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER); + struct rtas_work_area *work_area; + s32 fwrc; + int ret; + + might_sleep(); + + if (WARN_ON(!buf)) + return -EFAULT; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + work_area = rtas_work_area_alloc(sizeof(*buf)); + + memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); + + do { + fwrc = rtas_call(token, 3, 1, NULL, param.token, + rtas_work_area_phys(work_area), + rtas_work_area_size(work_area)); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case 0: + ret = 0; + memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf)); + break; + case -3: /* parameter not implemented */ + ret = -EOPNOTSUPP; + break; + case -9002: /* this partition not authorized to retrieve this parameter */ + ret = -EPERM; + break; + case -9999: /* "parameter error" e.g. the buffer is too small */ + ret = -EINVAL; + break; + default: + pr_err("unexpected ibm,get-system-parameter result %d\n", fwrc); + fallthrough; + case -1: /* Hardware/platform error */ + ret = -EIO; + break; + } + + rtas_work_area_free(work_area); + + return ret; +} + +int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf) +{ + const s32 token = rtas_function_token(RTAS_FN_IBM_SET_SYSTEM_PARAMETER); + struct rtas_work_area *work_area; + s32 fwrc; + int ret; + + might_sleep(); + + if (WARN_ON(!buf)) + return -EFAULT; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + work_area = rtas_work_area_alloc(sizeof(*buf)); + + memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf)); + + do { + fwrc = rtas_call(token, 2, 1, NULL, param.token, + rtas_work_area_phys(work_area)); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case 0: + ret = 0; + break; + case -3: /* parameter not supported */ + ret = -EOPNOTSUPP; + break; + case -9002: /* this partition not authorized to modify this parameter */ + ret = -EPERM; + break; + case -9999: /* "parameter error" e.g. invalid input data */ + ret = -EINVAL; + break; + default: + pr_err("unexpected ibm,set-system-parameter result %d\n", fwrc); + fallthrough; + case -1: /* Hardware/platform error */ + ret = -EIO; + break; + } + + rtas_work_area_free(work_area); + + return ret; +} diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c new file mode 100644 index 0000000000..526c621b09 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Platform energy and frequency attributes driver + * + * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a + * directory structure containing files in keyword - value pairs that specify + * energy and frequency configuration of the system. + * + * The format of exposing the sysfs information is as follows: + * /sys/firmware/papr/energy_scale_info/ + * |-- / + * |-- desc + * |-- value + * |-- value_desc (if exists) + * |-- / + * |-- desc + * |-- value + * |-- value_desc (if exists) + * + * Copyright 2022 IBM Corp. + */ + +#include +#include +#include + +#include "pseries.h" + +/* + * Flag attributes to fetch either all or one attribute from the HCALL + * flag = BE(0) => fetch all attributes with firstAttributeId = 0 + * flag = BE(1) => fetch a single attribute with firstAttributeId = id + */ +#define ESI_FLAGS_ALL 0 +#define ESI_FLAGS_SINGLE (1ull << 63) + +#define KOBJ_MAX_ATTRS 3 + +#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr) +#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute) +#define CURR_MAX_ESI_ATTRS 8 + +struct energy_scale_attribute { + __be64 id; + __be64 val; + u8 desc[64]; + u8 value_desc[64]; +} __packed; + +struct h_energy_scale_info_hdr { + __be64 num_attrs; + __be64 array_offset; + u8 data_header_version; +} __packed; + +struct papr_attr { + u64 id; + struct kobj_attribute kobj_attr; +}; + +struct papr_group { + struct attribute_group pg; + struct papr_attr pgattrs[KOBJ_MAX_ATTRS]; +}; + +static struct papr_group *papr_groups; +/* /sys/firmware/papr */ +static struct kobject *papr_kobj; +/* /sys/firmware/papr/energy_scale_info */ +static struct kobject *esi_kobj; + +/* + * Energy modes can change dynamically hence making a new hcall each time the + * information needs to be retrieved + */ +static int papr_get_attr(u64 id, struct energy_scale_attribute *esi) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct energy_scale_attribute *curr_esi; + struct h_energy_scale_info_hdr *hdr; + char *buf; + + buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + +retry: + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE, + id, virt_to_phys(buf), + esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); + if (temp_buf) + buf = temp_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO"); + ret = -EIO; + goto out_buf; + } + + hdr = (struct h_energy_scale_info_hdr *) buf; + curr_esi = (struct energy_scale_attribute *) + (buf + be64_to_cpu(hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs) + * sizeof(struct energy_scale_attribute))) { + ret = -EIO; + goto out_buf; + } + + *esi = *curr_esi; + +out_buf: + kfree(buf); + + return ret; +} + +/* + * Extract and export the description of the energy scale attributes + */ +static ssize_t desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.desc); +} + +/* + * Extract and export the numeric value of the energy scale attributes + */ +static ssize_t val_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val)); +} + +/* + * Extract and export the value description in string format of the energy + * scale attributes + */ +static ssize_t val_desc_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, + char *buf) +{ + struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr, + kobj_attr); + struct energy_scale_attribute esi; + int ret; + + ret = papr_get_attr(pattr->id, &esi); + if (ret) + return ret; + + return sysfs_emit(buf, "%s\n", esi.value_desc); +} + +static struct papr_ops_info { + const char *attr_name; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr, + char *buf); +} ops_info[KOBJ_MAX_ATTRS] = { + { "desc", desc_show }, + { "value", val_show }, + { "value_desc", val_desc_show }, +}; + +static void add_attr(u64 id, int index, struct papr_attr *attr) +{ + attr->id = id; + sysfs_attr_init(&attr->kobj_attr.attr); + attr->kobj_attr.attr.name = ops_info[index].attr_name; + attr->kobj_attr.attr.mode = 0444; + attr->kobj_attr.show = ops_info[index].show; +} + +static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc) +{ + int i; + + for (i = 0; i < KOBJ_MAX_ATTRS; i++) { + if (!strcmp(ops_info[i].attr_name, "value_desc") && + !show_val_desc) { + continue; + } + add_attr(id, i, &pg->pgattrs[i]); + pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr; + } + + return sysfs_create_group(esi_kobj, &pg->pg); +} + + +static int __init papr_init(void) +{ + int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE); + int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS; + struct h_energy_scale_info_hdr *esi_hdr; + struct energy_scale_attribute *esi_attrs; + uint64_t num_attrs; + char *esi_buf; + + if (!firmware_has_feature(FW_FEATURE_LPAR) || + !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) { + return -ENXIO; + } + + esi_buf = kmalloc(esi_buf_size, GFP_KERNEL); + if (esi_buf == NULL) + return -ENOMEM; + /* + * hcall( + * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info + * uint64 flags, // Per the flag request + * uint64 firstAttributeId, // The attribute id + * uint64 bufferAddress, // Guest physical address of the output buffer + * uint64 bufferSize); // The size in bytes of the output buffer + */ +retry: + + ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0, + virt_to_phys(esi_buf), esi_buf_size); + + /* + * If the hcall fails with not enough memory for either the + * header or data, attempt to allocate more + */ + if (ret == H_PARTIAL || ret == H_P4) { + char *temp_esi_buf; + + max_esi_attrs += 4; + esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); + + temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL); + if (temp_esi_buf) + esi_buf = temp_esi_buf; + else + return -ENOMEM; + + goto retry; + } + + if (ret != H_SUCCESS) { + pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret); + goto out_free_esi_buf; + } + + esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf; + num_attrs = be64_to_cpu(esi_hdr->num_attrs); + esi_attrs = (struct energy_scale_attribute *) + (esi_buf + be64_to_cpu(esi_hdr->array_offset)); + + if (esi_buf_size < + be64_to_cpu(esi_hdr->array_offset) + + (num_attrs * sizeof(struct energy_scale_attribute))) { + goto out_free_esi_buf; + } + + papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL); + if (!papr_groups) + goto out_free_esi_buf; + + papr_kobj = kobject_create_and_add("papr", firmware_kobj); + if (!papr_kobj) { + pr_warn("kobject_create_and_add papr failed\n"); + goto out_papr_groups; + } + + esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj); + if (!esi_kobj) { + pr_warn("kobject_create_and_add energy_scale_info failed\n"); + goto out_kobj; + } + + /* Allocate the groups before registering */ + for (idx = 0; idx < num_attrs; idx++) { + papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1, + sizeof(*papr_groups[idx].pg.attrs), + GFP_KERNEL); + if (!papr_groups[idx].pg.attrs) + goto out_pgattrs; + + papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld", + be64_to_cpu(esi_attrs[idx].id)); + if (papr_groups[idx].pg.name == NULL) + goto out_pgattrs; + } + + for (idx = 0; idx < num_attrs; idx++) { + bool show_val_desc = true; + + /* Do not add the value desc attr if it does not exist */ + if (strnlen(esi_attrs[idx].value_desc, + sizeof(esi_attrs[idx].value_desc)) == 0) + show_val_desc = false; + + if (add_attr_group(be64_to_cpu(esi_attrs[idx].id), + &papr_groups[idx], + show_val_desc)) { + pr_warn("Failed to create papr attribute group %s\n", + papr_groups[idx].pg.name); + idx = num_attrs; + goto out_pgattrs; + } + } + + kfree(esi_buf); + return 0; +out_pgattrs: + for (i = 0; i < idx ; i++) { + kfree(papr_groups[i].pg.attrs); + kfree(papr_groups[i].pg.name); + } + kobject_put(esi_kobj); +out_kobj: + kobject_put(papr_kobj); +out_papr_groups: + kfree(papr_groups); +out_free_esi_buf: + kfree(esi_buf); + + return -ENOMEM; +} + +machine_device_initcall(pseries, papr_init); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c new file mode 100644 index 0000000000..1a53e048ce --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -0,0 +1,1581 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "papr-scm: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define BIND_ANY_ADDR (~0ul) + +#define PAPR_SCM_DIMM_CMD_MASK \ + ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ + (1ul << ND_CMD_GET_CONFIG_DATA) | \ + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) + +/* DIMM health bitmap indicators */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are persisted from previous IPL */ +#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +/* SCM device memory life remaining is critically low */ +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) +/* SCM device is unable to persist memory contents in certain conditions */ +#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) +/* SCM device is encrypted */ +#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) +/* SCM device has been scrubbed and locked */ +#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) +#define PAPR_SCM_PERF_STATS_VERSION 0x1 + +/* Struct holding a single performance metric */ +struct papr_scm_perf_stat { + u8 stat_id[8]; + __be64 stat_val; +} __packed; + +/* Struct exchanged between kernel and PHYP for fetching drc perf stats */ +struct papr_scm_perf_stats { + u8 eye_catcher[8]; + /* Should be PAPR_SCM_PERF_STATS_VERSION */ + __be32 stats_version; + /* Number of stats following */ + __be32 num_statistics; + /* zero or more performance matrics */ + struct papr_scm_perf_stat scm_statistic[]; +} __packed; + +/* private struct associated with each region */ +struct papr_scm_priv { + struct platform_device *pdev; + struct device_node *dn; + uint32_t drc_index; + uint64_t blocks; + uint64_t block_size; + int metadata_size; + bool is_volatile; + bool hcall_flush_required; + + uint64_t bound_addr; + + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; + struct nvdimm *nvdimm; + struct resource res; + struct nd_region *region; + struct nd_interleave_set nd_set; + struct list_head region_list; + + /* Protect dimm health data from concurrent read/writes */ + struct mutex health_mutex; + + /* Last time the health information of the dimm was updated */ + unsigned long lasthealth_jiffies; + + /* Health information for the dimm */ + u64 health_bitmap; + + /* Holds the last known dirty shutdown counter value */ + u64 dirty_shutdown_counter; + + /* length of the stat buffer as expected by phyp */ + size_t stat_buffer_len; + + /* The bits which needs to be overridden */ + u64 health_bitmap_inject_mask; +}; + +static int papr_scm_pmem_flush(struct nd_region *nd_region, + struct bio *bio __maybe_unused) +{ + struct papr_scm_priv *p = nd_region_provider_data(nd_region); + unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; + long rc; + + dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); + + do { + rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); + token = ret_buf[0]; + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + } while (rc == H_BUSY); + + if (rc) { + dev_err(&p->pdev->dev, "flush error: %ld", rc); + rc = -EIO; + } else { + dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); + } + + return rc; +} + +static LIST_HEAD(papr_nd_regions); +static DEFINE_MUTEX(papr_ndr_lock); + +static int drc_pmem_bind(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + uint64_t saved = 0; + uint64_t token; + int64_t rc; + + /* + * When the hypervisor cannot map all the requested memory in a single + * hcall it returns H_BUSY and we call again with the token until + * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS + * leave the system in an undefined state, so we wait. + */ + token = 0; + + do { + rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, + p->blocks, BIND_ANY_ADDR, token); + token = ret[0]; + if (!saved) + saved = ret[1]; + cond_resched(); + } while (rc == H_BUSY); + + if (rc) + return rc; + + p->bound_addr = saved; + dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", + p->drc_index, (unsigned long)saved); + return rc; +} + +static void drc_pmem_unbind(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + uint64_t token = 0; + int64_t rc; + + dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index); + + /* NB: unbind has the same retry requirements as drc_pmem_bind() */ + do { + + /* Unbind of all SCM resources associated with drcIndex */ + rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, + p->drc_index, token); + token = ret[0]; + + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + } while (rc == H_BUSY); + + if (rc) + dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); + else + dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n", + p->drc_index); + + return; +} + +static int drc_pmem_query_n_bind(struct papr_scm_priv *p) +{ + unsigned long start_addr; + unsigned long end_addr; + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + int64_t rc; + + + rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, + p->drc_index, 0); + if (rc) + goto err_out; + start_addr = ret[0]; + + /* Make sure the full region is bound. */ + rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, + p->drc_index, p->blocks - 1); + if (rc) + goto err_out; + end_addr = ret[0]; + + if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size)) + goto err_out; + + p->bound_addr = start_addr; + dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr); + return rc; + +err_out: + dev_info(&p->pdev->dev, + "Failed to query, trying an unbind followed by bind"); + drc_pmem_unbind(p); + return drc_pmem_bind(p); +} + +/* + * Query the Dimm performance stats from PHYP and copy them (if returned) to + * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast + * (num_stats + header) bytes. + * - If buff_stats == NULL the return value is the size in bytes of the buffer + * needed to hold all supported performance-statistics. + * - If buff_stats != NULL and num_stats == 0 then we copy all known + * performance-statistics to 'buff_stat' and expect to be large enough to + * hold them. + * - if buff_stats != NULL and num_stats > 0 then copy the requested + * performance-statistics to buff_stats. + */ +static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, + struct papr_scm_perf_stats *buff_stats, + unsigned int num_stats) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + size_t size; + s64 rc; + + /* Setup the out buffer */ + if (buff_stats) { + memcpy(buff_stats->eye_catcher, + PAPR_SCM_PERF_STATS_EYECATCHER, 8); + buff_stats->stats_version = + cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); + buff_stats->num_statistics = + cpu_to_be32(num_stats); + + /* + * Calculate the buffer size based on num-stats provided + * or use the prefetched max buffer length + */ + if (num_stats) + /* Calculate size from the num_stats */ + size = sizeof(struct papr_scm_perf_stats) + + num_stats * sizeof(struct papr_scm_perf_stat); + else + size = p->stat_buffer_len; + } else { + /* In case of no out buffer ignore the size */ + size = 0; + } + + /* Do the HCALL asking PHYP for info */ + rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, + buff_stats ? virt_to_phys(buff_stats) : 0, + size); + + /* Check if the error was due to an unknown stat-id */ + if (rc == H_PARTIAL) { + dev_err(&p->pdev->dev, + "Unknown performance stats, Err:0x%016lX\n", ret[0]); + return -ENOENT; + } else if (rc == H_AUTHORITY) { + dev_info(&p->pdev->dev, + "Permission denied while accessing performance stats"); + return -EPERM; + } else if (rc == H_UNSUPPORTED) { + dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); + return -EOPNOTSUPP; + } else if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query performance stats, Err:%lld\n", rc); + return -EIO; + + } else if (!size) { + /* Handle case where stat buffer size was requested */ + dev_dbg(&p->pdev->dev, + "Performance stats size %ld\n", ret[0]); + return ret[0]; + } + + /* Successfully fetched the requested stats from phyp */ + dev_dbg(&p->pdev->dev, + "Performance stats returned %d stats\n", + be32_to_cpu(buff_stats->num_statistics)); + return 0; +} + +#ifdef CONFIG_PERF_EVENTS +#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) + +static const char * const nvdimm_events_map[] = { + [1] = "CtlResCt", + [2] = "CtlResTm", + [3] = "PonSecs ", + [4] = "MemLife ", + [5] = "CritRscU", + [6] = "HostLCnt", + [7] = "HostSCnt", + [8] = "HostSDur", + [9] = "HostLDur", + [10] = "MedRCnt ", + [11] = "MedWCnt ", + [12] = "MedRDur ", + [13] = "MedWDur ", + [14] = "CchRHCnt", + [15] = "CchWHCnt", + [16] = "FastWCnt", +}; + +static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) +{ + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + struct papr_scm_priv *p = dev_get_drvdata(dev); + int rc, size; + + /* Invalid eventcode */ + if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map)) + return -EINVAL; + + /* Allocate request buffer enough to hold single performance stat */ + size = sizeof(struct papr_scm_perf_stats) + + sizeof(struct papr_scm_perf_stat); + + if (!p) + return -EINVAL; + + stats = kzalloc(size, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + stat = &stats->scm_statistic[0]; + memcpy(&stat->stat_id, + nvdimm_events_map[event->attr.config], + sizeof(stat->stat_id)); + stat->stat_val = 0; + + rc = drc_pmem_query_stats(p, stats, 1); + if (rc < 0) { + kfree(stats); + return rc; + } + + *count = be64_to_cpu(stat->stat_val); + kfree(stats); + return 0; +} + +static int papr_scm_pmu_event_init(struct perf_event *event) +{ + struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); + struct papr_scm_priv *p; + + if (!nd_pmu) + return -EINVAL; + + /* test the event attr type for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* it does not support event sampling mode */ + if (is_sampling_event(event)) + return -EOPNOTSUPP; + + /* no branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + p = (struct papr_scm_priv *)nd_pmu->dev->driver_data; + if (!p) + return -EINVAL; + + /* Invalid eventcode */ + if (event->attr.config == 0 || event->attr.config > 16) + return -EINVAL; + + return 0; +} + +static int papr_scm_pmu_add(struct perf_event *event, int flags) +{ + u64 count; + int rc; + struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); + + if (!nd_pmu) + return -EINVAL; + + if (flags & PERF_EF_START) { + rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count); + if (rc) + return rc; + + local64_set(&event->hw.prev_count, count); + } + + return 0; +} + +static void papr_scm_pmu_read(struct perf_event *event) +{ + u64 prev, now; + int rc; + struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); + + if (!nd_pmu) + return; + + rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now); + if (rc) + return; + + prev = local64_xchg(&event->hw.prev_count, now); + local64_add(now - prev, &event->count); +} + +static void papr_scm_pmu_del(struct perf_event *event, int flags) +{ + papr_scm_pmu_read(event); +} + +static void papr_scm_pmu_register(struct papr_scm_priv *p) +{ + struct nvdimm_pmu *nd_pmu; + int rc, nodeid; + + nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL); + if (!nd_pmu) { + rc = -ENOMEM; + goto pmu_err_print; + } + + if (!p->stat_buffer_len) { + rc = -ENOENT; + goto pmu_check_events_err; + } + + nd_pmu->pmu.task_ctx_nr = perf_invalid_context; + nd_pmu->pmu.name = nvdimm_name(p->nvdimm); + nd_pmu->pmu.event_init = papr_scm_pmu_event_init; + nd_pmu->pmu.read = papr_scm_pmu_read; + nd_pmu->pmu.add = papr_scm_pmu_add; + nd_pmu->pmu.del = papr_scm_pmu_del; + + nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT | + PERF_PMU_CAP_NO_EXCLUDE; + + /*updating the cpumask variable */ + nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev)); + nd_pmu->arch_cpumask = *cpumask_of_node(nodeid); + + rc = register_nvdimm_pmu(nd_pmu, p->pdev); + if (rc) + goto pmu_check_events_err; + + /* + * Set archdata.priv value to nvdimm_pmu structure, to handle the + * unregistering of pmu device. + */ + p->pdev->archdata.priv = nd_pmu; + return; + +pmu_check_events_err: + kfree(nd_pmu); +pmu_err_print: + dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc); +} + +#else +static void papr_scm_pmu_register(struct papr_scm_priv *p) { } +#endif + +/* + * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the + * health information. + */ +static int __drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + u64 bitmap = 0; + long rc; + + /* issue the hcall */ + rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); + if (rc == H_SUCCESS) + bitmap = ret[0] & ret[1]; + else if (rc == H_FUNCTION) + dev_info_once(&p->pdev->dev, + "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); + else { + + dev_err(&p->pdev->dev, + "Failed to query health information, Err:%ld\n", rc); + return -ENXIO; + } + + p->lasthealth_jiffies = jiffies; + /* Allow injecting specific health bits via inject mask. */ + if (p->health_bitmap_inject_mask) + bitmap = (bitmap & ~p->health_bitmap_inject_mask) | + p->health_bitmap_inject_mask; + WRITE_ONCE(p->health_bitmap, bitmap); + dev_dbg(&p->pdev->dev, + "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", + ret[0], ret[1]); + + return 0; +} + +/* Min interval in seconds for assuming stable dimm health */ +#define MIN_HEALTH_QUERY_INTERVAL 60 + +/* Query cached health info and if needed call drc_pmem_query_health */ +static int drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long cache_timeout; + int rc; + + /* Protect concurrent modifications to papr_scm_priv */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Jiffies offset for which the health data is assumed to be same */ + cache_timeout = p->lasthealth_jiffies + + msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); + + /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ + if (time_after(jiffies, cache_timeout)) + rc = __drc_pmem_query_health(p); + else + /* Assume cached health data is valid */ + rc = 0; + + mutex_unlock(&p->health_mutex); + return rc; +} + +static int papr_scm_meta_get(struct papr_scm_priv *p, + struct nd_cmd_get_config_data_hdr *hdr) +{ + unsigned long data[PLPAR_HCALL_BUFSIZE]; + unsigned long offset, data_offset; + int len, read; + int64_t ret; + + if ((hdr->in_offset + hdr->in_length) > p->metadata_size) + return -EINVAL; + + for (len = hdr->in_length; len; len -= read) { + + data_offset = hdr->in_length - len; + offset = hdr->in_offset + data_offset; + + if (len >= 8) + read = 8; + else if (len >= 4) + read = 4; + else if (len >= 2) + read = 2; + else + read = 1; + + ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, + offset, read); + + if (ret == H_PARAMETER) /* bad DRC index */ + return -ENODEV; + if (ret) + return -EINVAL; /* other invalid parameter */ + + switch (read) { + case 8: + *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); + break; + case 4: + *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); + break; + + case 2: + *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); + break; + + case 1: + *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); + break; + } + } + return 0; +} + +static int papr_scm_meta_set(struct papr_scm_priv *p, + struct nd_cmd_set_config_hdr *hdr) +{ + unsigned long offset, data_offset; + int len, wrote; + unsigned long data; + __be64 data_be; + int64_t ret; + + if ((hdr->in_offset + hdr->in_length) > p->metadata_size) + return -EINVAL; + + for (len = hdr->in_length; len; len -= wrote) { + + data_offset = hdr->in_length - len; + offset = hdr->in_offset + data_offset; + + if (len >= 8) { + data = *(uint64_t *)(hdr->in_buf + data_offset); + data_be = cpu_to_be64(data); + wrote = 8; + } else if (len >= 4) { + data = *(uint32_t *)(hdr->in_buf + data_offset); + data &= 0xffffffff; + data_be = cpu_to_be32(data); + wrote = 4; + } else if (len >= 2) { + data = *(uint16_t *)(hdr->in_buf + data_offset); + data &= 0xffff; + data_be = cpu_to_be16(data); + wrote = 2; + } else { + data_be = *(uint8_t *)(hdr->in_buf + data_offset); + data_be &= 0xff; + wrote = 1; + } + + ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, + offset, data_be, wrote); + if (ret == H_PARAMETER) /* bad DRC index */ + return -ENODEV; + if (ret) + return -EINVAL; /* other invalid parameter */ + } + + return 0; +} + +/* + * Do a sanity checks on the inputs args to dimm-control function and return + * '0' if valid. Validation of PDSM payloads happens later in + * papr_scm_service_pdsm. + */ +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; + struct nd_cmd_pkg *nd_cmd; + struct papr_scm_priv *p; + enum papr_pdsm pdsm; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + /* get the provider data from struct nvdimm */ + p = nvdimm_provider_data(nvdimm); + + if (!test_bit(cmd, &cmd_mask)) { + dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); + return -EINVAL; + } + + /* For CMD_CALL verify pdsm request */ + if (cmd == ND_CMD_CALL) { + /* Verify the envelope and envelop size */ + if (!buf || + buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", + buf_len); + return -EINVAL; + } + + /* Verify that the nd_cmd_pkg.nd_family is correct */ + nd_cmd = (struct nd_cmd_pkg *)buf; + + if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { + dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", + nd_cmd->nd_family); + return -EINVAL; + } + + pdsm = (enum papr_pdsm)nd_cmd->nd_command; + + /* Verify if the pdsm command is valid */ + if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", + pdsm); + return -EINVAL; + } + + /* Have enough space to hold returned 'nd_pkg_pdsm' header */ + if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", + pdsm); + return -EINVAL; + } + } + + /* Let the command be further processed */ + return 0; +} + +static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc, size; + u64 statval; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + + /* Silently fail if fetching performance metrics isn't supported */ + if (!p->stat_buffer_len) + return 0; + + /* Allocate request buffer enough to hold single performance stat */ + size = sizeof(struct papr_scm_perf_stats) + + sizeof(struct papr_scm_perf_stat); + + stats = kzalloc(size, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + stat = &stats->scm_statistic[0]; + memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); + stat->stat_val = 0; + + /* Fetch the fuel gauge and populate it in payload */ + rc = drc_pmem_query_stats(p, stats, 1); + if (rc < 0) { + dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); + goto free_stats; + } + + statval = be64_to_cpu(stat->stat_val); + dev_dbg(&p->pdev->dev, + "Fetched fuel-gauge %llu", statval); + payload->health.extension_flags |= + PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; + payload->health.dimm_fuel_gauge = statval; + + rc = sizeof(struct nd_papr_pdsm_health); + +free_stats: + kfree(stats); + return rc; +} + +/* Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_dsc(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; + payload->health.dimm_dsc = p->dirty_shutdown_counter; + + return sizeof(struct nd_papr_pdsm_health); +} + +/* Fetch the DIMM health info and populate it in provided package. */ +static int papr_pdsm_health(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + + /* Ensure dimm health mutex is taken preventing concurrent access */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + goto out; + + /* Always fetch upto date dimm health data ignoring cached values */ + rc = __drc_pmem_query_health(p); + if (rc) { + mutex_unlock(&p->health_mutex); + goto out; + } + + /* update health struct with various flags derived from health bitmap */ + payload->health = (struct nd_papr_pdsm_health) { + .extension_flags = 0, + .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), + .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), + .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), + .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), + .dimm_health = PAPR_PDSM_DIMM_HEALTHY, + }; + + /* Update field dimm_health based on health_bitmap flags */ + if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) + payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; + + /* struct populated hence can release the mutex now */ + mutex_unlock(&p->health_mutex); + + /* Populate the fuel gauge meter in the payload */ + papr_pdsm_fuel_gauge(p, payload); + /* Populate the dirty-shutdown-counter field */ + papr_pdsm_dsc(p, payload); + + rc = sizeof(struct nd_papr_pdsm_health); + +out: + return rc; +} + +/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_smart_inject(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + u32 supported_flags = 0; + u64 inject_mask = 0, clear_mask = 0; + u64 mask; + + /* Check for individual smart error flags and update inject/clear masks */ + if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { + supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; + if (payload->smart_inject.fatal_enable) + inject_mask |= PAPR_PMEM_HEALTH_FATAL; + else + clear_mask |= PAPR_PMEM_HEALTH_FATAL; + } + + if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { + supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; + if (payload->smart_inject.unsafe_shutdown_enable) + inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + else + clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; + } + + dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", + inject_mask, clear_mask); + + /* Prevent concurrent access to dimm health bitmap related members */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Use inject/clear masks to set health_bitmap_inject_mask */ + mask = READ_ONCE(p->health_bitmap_inject_mask); + mask = (mask & ~clear_mask) | inject_mask; + WRITE_ONCE(p->health_bitmap_inject_mask, mask); + + /* Invalidate cached health bitmap */ + p->lasthealth_jiffies = 0; + + mutex_unlock(&p->health_mutex); + + /* Return the supported flags back to userspace */ + payload->smart_inject.flags = supported_flags; + + return sizeof(struct nd_papr_pdsm_health); +} + +/* + * 'struct pdsm_cmd_desc' + * Identifies supported PDSMs' expected length of in/out payloads + * and pdsm service function. + * + * size_in : Size of input payload if any in the PDSM request. + * size_out : Size of output payload if any in the PDSM request. + * service : Service function for the PDSM request. Return semantics: + * rc < 0 : Error servicing PDSM and rc indicates the error. + * rc >=0 : Serviced successfully and 'rc' indicate number of + * bytes written to payload. + */ +struct pdsm_cmd_desc { + u32 size_in; + u32 size_out; + int (*service)(struct papr_scm_priv *dimm, + union nd_pdsm_payload *payload); +}; + +/* Holds all supported PDSMs' command descriptors */ +static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { + [PAPR_PDSM_MIN] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, + /* New PDSM command descriptors to be added below */ + + [PAPR_PDSM_HEALTH] = { + .size_in = 0, + .size_out = sizeof(struct nd_papr_pdsm_health), + .service = papr_pdsm_health, + }, + + [PAPR_PDSM_SMART_INJECT] = { + .size_in = sizeof(struct nd_papr_pdsm_smart_inject), + .size_out = sizeof(struct nd_papr_pdsm_smart_inject), + .service = papr_pdsm_smart_inject, + }, + /* Empty */ + [PAPR_PDSM_MAX] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, +}; + +/* Given a valid pdsm cmd return its command descriptor else return NULL */ +static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) +{ + if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) + return &__pdsm_cmd_descriptors[cmd]; + + return NULL; +} + +/* + * For a given pdsm request call an appropriate service function. + * Returns errors if any while handling the pdsm command package. + */ +static int papr_scm_service_pdsm(struct papr_scm_priv *p, + struct nd_cmd_pkg *pkg) +{ + /* Get the PDSM header and PDSM command */ + struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; + enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; + const struct pdsm_cmd_desc *pdsc; + int rc; + + /* Fetch corresponding pdsm descriptor for validation and servicing */ + pdsc = pdsm_cmd_desc(pdsm); + + /* Validate pdsm descriptor */ + /* Ensure that reserved fields are 0 */ + if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", + pdsm); + return -EINVAL; + } + + /* If pdsm expects some input, then ensure that the size_in matches */ + if (pdsc->size_in && + pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", + pdsm, pkg->nd_size_in); + return -EINVAL; + } + + /* If pdsm wants to return data, then ensure that size_out matches */ + if (pdsc->size_out && + pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", + pdsm, pkg->nd_size_out); + return -EINVAL; + } + + /* Service the pdsm */ + if (pdsc->service) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); + + rc = pdsc->service(p, &pdsm_pkg->payload); + + if (rc < 0) { + /* error encountered while servicing pdsm */ + pdsm_pkg->cmd_status = rc; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } else { + /* pdsm serviced and 'rc' bytes written to payload */ + pdsm_pkg->cmd_status = 0; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; + } + } else { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", + pdsm); + pdsm_pkg->cmd_status = -ENOENT; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } + + return pdsm_pkg->cmd_status; +} + +static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) +{ + struct nd_cmd_get_config_size *get_size_hdr; + struct nd_cmd_pkg *call_pkg = NULL; + struct papr_scm_priv *p; + int rc; + + rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); + if (rc) { + pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); + return rc; + } + + /* Use a local variable in case cmd_rc pointer is NULL */ + if (!cmd_rc) + cmd_rc = &rc; + + p = nvdimm_provider_data(nvdimm); + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: + get_size_hdr = buf; + + get_size_hdr->status = 0; + get_size_hdr->max_xfer = 8; + get_size_hdr->config_size = p->metadata_size; + *cmd_rc = 0; + break; + + case ND_CMD_GET_CONFIG_DATA: + *cmd_rc = papr_scm_meta_get(p, buf); + break; + + case ND_CMD_SET_CONFIG_DATA: + *cmd_rc = papr_scm_meta_set(p, buf); + break; + + case ND_CMD_CALL: + call_pkg = (struct nd_cmd_pkg *)buf; + *cmd_rc = papr_scm_service_pdsm(p, call_pkg); + break; + + default: + dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); + return -EINVAL; + } + + dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); + + return 0; +} + +static ssize_t health_bitmap_inject_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sprintf(buf, "%#llx\n", + READ_ONCE(p->health_bitmap_inject_mask)); +} + +static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); + +static ssize_t perf_stats_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int index; + ssize_t rc; + struct seq_buf s; + struct papr_scm_perf_stat *stat; + struct papr_scm_perf_stats *stats; + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + if (!p->stat_buffer_len) + return -ENOENT; + + /* Allocate the buffer for phyp where stats are written */ + stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); + if (!stats) + return -ENOMEM; + + /* Ask phyp to return all dimm perf stats */ + rc = drc_pmem_query_stats(p, stats, 0); + if (rc) + goto free_stats; + /* + * Go through the returned output buffer and print stats and + * values. Since stat_id is essentially a char string of + * 8 bytes, simply use the string format specifier to print it. + */ + seq_buf_init(&s, buf, PAGE_SIZE); + for (index = 0, stat = stats->scm_statistic; + index < be32_to_cpu(stats->num_statistics); + ++index, ++stat) { + seq_buf_printf(&s, "%.8s = 0x%016llX\n", + stat->stat_id, + be64_to_cpu(stat->stat_val)); + } + +free_stats: + kfree(stats); + return rc ? rc : (ssize_t)seq_buf_used(&s); +} +static DEVICE_ATTR_ADMIN_RO(perf_stats); + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + struct seq_buf s; + u64 health; + int rc; + + rc = drc_pmem_query_health(p); + if (rc) + return rc; + + /* Copy health_bitmap locally, check masks & update out buffer */ + health = READ_ONCE(p->health_bitmap); + + seq_buf_init(&s, buf, PAGE_SIZE); + if (health & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (health & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (health & PAPR_PMEM_ENCRYPTED) + seq_buf_printf(&s, "encrypted "); + + if (health & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) + seq_buf_printf(&s, "scrubbed locked "); + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +DEVICE_ATTR_RO(flags); + +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); +} +DEVICE_ATTR_RO(dirty_shutdown); + +static umode_t papr_nd_attribute_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); + + /* For if perf-stats not available remove perf_stats sysfs */ + if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) + return 0; + + return attr->mode; +} + +/* papr_scm specific dimm attributes */ +static struct attribute *papr_nd_attributes[] = { + &dev_attr_flags.attr, + &dev_attr_perf_stats.attr, + &dev_attr_dirty_shutdown.attr, + &dev_attr_health_bitmap_inject.attr, + NULL, +}; + +static const struct attribute_group papr_nd_attribute_group = { + .name = "papr", + .is_visible = papr_nd_attribute_visible, + .attrs = papr_nd_attributes, +}; + +static const struct attribute_group *papr_nd_attr_groups[] = { + &papr_nd_attribute_group, + NULL, +}; + +static int papr_scm_nvdimm_init(struct papr_scm_priv *p) +{ + struct device *dev = &p->pdev->dev; + struct nd_mapping_desc mapping; + struct nd_region_desc ndr_desc; + unsigned long dimm_flags; + int target_nid, online_nid; + + p->bus_desc.ndctl = papr_scm_ndctl; + p->bus_desc.module = THIS_MODULE; + p->bus_desc.of_node = p->pdev->dev.of_node; + p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); + + /* Set the dimm command family mask to accept PDSMs */ + set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); + + if (!p->bus_desc.provider_name) + return -ENOMEM; + + p->bus = nvdimm_bus_register(NULL, &p->bus_desc); + if (!p->bus) { + dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); + kfree(p->bus_desc.provider_name); + return -ENXIO; + } + + dimm_flags = 0; + set_bit(NDD_LABELING, &dimm_flags); + + /* + * Check if the nvdimm is unarmed. No locking needed as we are still + * initializing. Ignore error encountered if any. + */ + __drc_pmem_query_health(p); + + if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) + set_bit(NDD_UNARMED, &dimm_flags); + + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); + if (!p->nvdimm) { + dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); + goto err; + } + + if (nvdimm_bus_check_dimm_count(p->bus, 1)) + goto err; + + /* now add the region */ + + memset(&mapping, 0, sizeof(mapping)); + mapping.nvdimm = p->nvdimm; + mapping.start = 0; + mapping.size = p->blocks * p->block_size; // XXX: potential overflow? + + memset(&ndr_desc, 0, sizeof(ndr_desc)); + target_nid = dev_to_node(&p->pdev->dev); + online_nid = numa_map_to_online_node(target_nid); + ndr_desc.numa_node = online_nid; + ndr_desc.target_node = target_nid; + ndr_desc.res = &p->res; + ndr_desc.of_node = p->dn; + ndr_desc.provider_data = p; + ndr_desc.mapping = &mapping; + ndr_desc.num_mappings = 1; + ndr_desc.nd_set = &p->nd_set; + + if (p->hcall_flush_required) { + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + ndr_desc.flush = papr_scm_pmem_flush; + } + + if (p->is_volatile) + p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); + else { + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); + p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); + } + if (!p->region) { + dev_err(dev, "Error registering region %pR from %pOF\n", + ndr_desc.res, p->dn); + goto err; + } + if (target_nid != online_nid) + dev_info(dev, "Region registered with target node %d and online node %d", + target_nid, online_nid); + + mutex_lock(&papr_ndr_lock); + list_add_tail(&p->region_list, &papr_nd_regions); + mutex_unlock(&papr_ndr_lock); + + return 0; + +err: nvdimm_bus_unregister(p->bus); + kfree(p->bus_desc.provider_name); + return -ENXIO; +} + +static void papr_scm_add_badblock(struct nd_region *region, + struct nvdimm_bus *bus, u64 phys_addr) +{ + u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); + + if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { + pr_err("Bad block registration for 0x%llx failed\n", phys_addr); + return; + } + + pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", + aligned_addr, aligned_addr + L1_CACHE_BYTES); + + nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); +} + +static int handle_mce_ue(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct machine_check_event *evt = data; + struct papr_scm_priv *p; + u64 phys_addr; + bool found = false; + + if (evt->error_type != MCE_ERROR_TYPE_UE) + return NOTIFY_DONE; + + if (list_empty(&papr_nd_regions)) + return NOTIFY_DONE; + + /* + * The physical address obtained here is PAGE_SIZE aligned, so get the + * exact address from the effective address + */ + phys_addr = evt->u.ue_error.physical_address + + (evt->u.ue_error.effective_address & ~PAGE_MASK); + + if (!evt->u.ue_error.physical_address_provided || + !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) + return NOTIFY_DONE; + + /* mce notifier is called from a process context, so mutex is safe */ + mutex_lock(&papr_ndr_lock); + list_for_each_entry(p, &papr_nd_regions, region_list) { + if (phys_addr >= p->res.start && phys_addr <= p->res.end) { + found = true; + break; + } + } + + if (found) + papr_scm_add_badblock(p->region, p->bus, phys_addr); + + mutex_unlock(&papr_ndr_lock); + + return found ? NOTIFY_OK : NOTIFY_DONE; +} + +static struct notifier_block mce_ue_nb = { + .notifier_call = handle_mce_ue +}; + +static int papr_scm_probe(struct platform_device *pdev) +{ + struct device_node *dn = pdev->dev.of_node; + u32 drc_index, metadata_size; + u64 blocks, block_size; + struct papr_scm_priv *p; + u8 uuid_raw[UUID_SIZE]; + const char *uuid_str; + ssize_t stat_size; + uuid_t uuid; + int rc; + + /* check we have all the required DT properties */ + if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { + dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); + return -ENODEV; + } + + if (of_property_read_u64(dn, "ibm,block-size", &block_size)) { + dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn); + return -ENODEV; + } + + if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) { + dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn); + return -ENODEV; + } + + if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) { + dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn); + return -ENODEV; + } + + /* + * open firmware platform device create won't update the NUMA + * distance table. For PAPR SCM devices we use numa_map_to_online_node() + * to find the nearest online NUMA node and that requires correct + * distance table information. + */ + update_numa_distance(dn); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + /* Initialize the dimm mutex */ + mutex_init(&p->health_mutex); + + /* optional DT properties */ + of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); + + p->dn = dn; + p->drc_index = drc_index; + p->block_size = block_size; + p->blocks = blocks; + p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); + p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); + + if (of_property_read_u64(dn, "ibm,persistence-failed-count", + &p->dirty_shutdown_counter)) + p->dirty_shutdown_counter = 0; + + /* We just need to ensure that set cookies are unique across */ + uuid_parse(uuid_str, &uuid); + + /* + * The cookie1 and cookie2 are not really little endian. + * We store a raw buffer representation of the + * uuid string so that we can compare this with the label + * area cookie irrespective of the endian configuration + * with which the kernel is built. + * + * Historically we stored the cookie in the below format. + * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa + * cookie1 was 0xfd423b0b671b5172 + * cookie2 was 0xaabce8cae35b1d8d + */ + export_uuid(uuid_raw, &uuid); + p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); + p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); + + /* might be zero */ + p->metadata_size = metadata_size; + p->pdev = pdev; + + /* request the hypervisor to bind this region to somewhere in memory */ + rc = drc_pmem_bind(p); + + /* If phyp says drc memory still bound then force unbound and retry */ + if (rc == H_OVERLAP) + rc = drc_pmem_query_n_bind(p); + + if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, "bind err: %d\n", rc); + rc = -ENXIO; + goto err; + } + + /* setup the resource for the newly bound range */ + p->res.start = p->bound_addr; + p->res.end = p->bound_addr + p->blocks * p->block_size - 1; + p->res.name = pdev->name; + p->res.flags = IORESOURCE_MEM; + + /* Try retrieving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } + + rc = papr_scm_nvdimm_init(p); + if (rc) + goto err2; + + platform_set_drvdata(pdev, p); + papr_scm_pmu_register(p); + + return 0; + +err2: drc_pmem_unbind(p); +err: kfree(p); + return rc; +} + +static int papr_scm_remove(struct platform_device *pdev) +{ + struct papr_scm_priv *p = platform_get_drvdata(pdev); + + mutex_lock(&papr_ndr_lock); + list_del(&p->region_list); + mutex_unlock(&papr_ndr_lock); + + nvdimm_bus_unregister(p->bus); + drc_pmem_unbind(p); + + if (pdev->archdata.priv) + unregister_nvdimm_pmu(pdev->archdata.priv); + + pdev->archdata.priv = NULL; + kfree(p->bus_desc.provider_name); + kfree(p); + + return 0; +} + +static const struct of_device_id papr_scm_match[] = { + { .compatible = "ibm,pmemory" }, + { .compatible = "ibm,pmemory-v2" }, + { }, +}; + +static struct platform_driver papr_scm_driver = { + .probe = papr_scm_probe, + .remove = papr_scm_remove, + .driver = { + .name = "papr_scm", + .of_match_table = papr_scm_match, + }, +}; + +static int __init papr_scm_init(void) +{ + int ret; + + ret = platform_driver_register(&papr_scm_driver); + if (!ret) + mce_register_notifier(&mce_ue_nb); + + return ret; +} +module_init(papr_scm_init); + +static void __exit papr_scm_exit(void) +{ + mce_unregister_notifier(&mce_ue_nb); + platform_driver_unregister(&papr_scm_driver); +} +module_exit(papr_scm_exit); + +MODULE_DEVICE_TABLE(of, papr_scm_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c new file mode 100644 index 0000000000..1772ae3d19 --- /dev/null +++ b/arch/powerpc/platforms/pseries/pci.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard , IBM + * + * pSeries specific routines for PCI. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "pseries.h" + +#if 0 +void pcibios_name_device(struct pci_dev *dev) +{ + struct device_node *dn; + + /* + * Add IBM loc code (slot) as a prefix to the device names for service + */ + dn = pci_device_to_OF_node(dev); + if (dn) { + const char *loc_code = of_get_property(dn, "ibm,loc-code", + NULL); + if (loc_code) { + int loc_len = strlen(loc_code); + if (loc_len < sizeof(dev->dev.name)) { + memmove(dev->dev.name+loc_len+1, dev->dev.name, + sizeof(dev->dev.name)-loc_len-1); + memcpy(dev->dev.name, loc_code, loc_len); + dev->dev.name[loc_len] = ' '; + dev->dev.name[sizeof(dev->dev.name)-1] = '\0'; + } + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); +#endif + +#ifdef CONFIG_PCI_IOV +#define MAX_VFS_FOR_MAP_PE 256 +struct pe_map_bar_entry { + __be64 bar; /* Input: Virtual Function BAR */ + __be16 rid; /* Input: Virtual Function Router ID */ + __be16 pe_num; /* Output: Virtual Function PE Number */ + __be32 reserved; /* Reserved Space */ +}; + +static int pseries_send_map_pe(struct pci_dev *pdev, u16 num_vfs, + struct pe_map_bar_entry *vf_pe_array) +{ + struct pci_dn *pdn; + int rc; + unsigned long buid, addr; + int ibm_map_pes = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_MAP_PE_NUMBER); + + if (ibm_map_pes == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + pdn = pci_get_pdn(pdev); + addr = rtas_config_addr(pdn->busno, pdn->devfn, 0); + buid = pdn->phb->buid; + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, vf_pe_array, + RTAS_DATA_BUF_SIZE); + rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr, + BUID_HI(buid), BUID_LO(buid), + rtas_data_buf, + num_vfs * sizeof(struct pe_map_bar_entry)); + memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + + if (rc) + dev_err(&pdev->dev, + "%s: Failed to associate pes PE#%lx, rc=%x\n", + __func__, addr, rc); + + return rc; +} + +static void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num); + dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n", + pci_domain_nr(pdev->bus), + pdev->bus->number, + PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), + PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), + pdn->pe_num_map[vf_index]); +} + +static int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + int i, rc, vf_index; + struct pe_map_bar_entry *vf_pe_array; + struct resource *res; + u64 size; + + vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!vf_pe_array) + return -ENOMEM; + + pdn = pci_get_pdn(pdev); + /* create firmware structure to associate pes */ + for (vf_index = 0; vf_index < num_vfs; vf_index++) { + pdn->pe_num_map[vf_index] = IODA_INVALID_PE; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + if (!res->parent) + continue; + size = pcibios_iov_resource_alignment(pdev, i + + PCI_IOV_RESOURCES); + vf_pe_array[vf_index].bar = + cpu_to_be64(res->start + size * vf_index); + vf_pe_array[vf_index].rid = + cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index) + << 8) | pci_iov_virtfn_devfn(pdev, + vf_index)); + vf_pe_array[vf_index].pe_num = + cpu_to_be16(IODA_INVALID_PE); + } + } + + rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array); + /* Only zero is success */ + if (!rc) + for (vf_index = 0; vf_index < num_vfs; vf_index++) + pseries_set_pe_num(pdev, vf_index, + vf_pe_array[vf_index].pe_num); + + kfree(vf_pe_array); + return rc; +} + +static int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + struct pci_dn *pdn; + int rc; + const int *max_vfs; + int max_config_vfs; + struct device_node *dn = pci_device_to_OF_node(pdev); + + max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL); + + if (!max_vfs) + return -EINVAL; + + /* First integer stores max config */ + max_config_vfs = of_read_number(&max_vfs[0], 1); + if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) { + dev_err(&pdev->dev, + "Num VFs %x > %x Configurable VFs\n", + num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ? + MAX_VFS_FOR_MAP_PE : max_config_vfs); + return -EINVAL; + } + + pdn = pci_get_pdn(pdev); + pdn->pe_num_map = kmalloc_array(num_vfs, + sizeof(*pdn->pe_num_map), + GFP_KERNEL); + if (!pdn->pe_num_map) + return -ENOMEM; + + rc = pseries_associate_pes(pdev, num_vfs); + + /* Anything other than zero is failure */ + if (rc) { + dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc); + kfree(pdn->pe_num_map); + } else { + pci_vf_drivers_autoprobe(pdev, false); + } + + return rc; +} + +static int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) +{ + /* Allocate PCI data */ + add_sriov_vf_pdns(pdev); + return pseries_pci_sriov_enable(pdev, num_vfs); +} + +static int pseries_pcibios_sriov_disable(struct pci_dev *pdev) +{ + struct pci_dn *pdn; + + pdn = pci_get_pdn(pdev); + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); + /* Release PCI data */ + remove_sriov_vf_pdns(pdev); + pci_vf_drivers_autoprobe(pdev, true); + return 0; +} +#endif + +static void __init pSeries_request_regions(void) +{ + if (!isa_io_base) + return; + + request_region(0x20,0x20,"pic1"); + request_region(0xa0,0x20,"pic2"); + request_region(0x00,0x20,"dma1"); + request_region(0x40,0x20,"timer"); + request_region(0x80,0x10,"dma page reg"); + request_region(0xc0,0x20,"dma2"); +} + +void __init pSeries_final_fixup(void) +{ + pSeries_request_regions(); + + eeh_show_enabled(); + +#ifdef CONFIG_PCI_IOV + ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable; + ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable; +#endif +} + +/* + * Assume the winbond 82c105 is the IDE controller on a + * p610/p615/p630. We should probably be more careful in case + * someone tries to plug in a similar adapter. + */ +static void fixup_winbond_82c105(struct pci_dev* dev) +{ + struct resource *r; + unsigned int reg; + + if (!machine_is(pseries)) + return; + + printk("Using INTC for W82c105 IDE controller.\n"); + pci_read_config_dword(dev, 0x40, ®); + /* Enable LEGIRQ to use INTC instead of ISA interrupts */ + pci_write_config_dword(dev, 0x40, reg | (1<<11)); + + pci_dev_for_each_resource(dev, r) { + /* zap the 2nd function of the winbond chip */ + if (dev->bus->number == 0 && dev->devfn == 0x81 && + r->flags & IORESOURCE_IO) + r->flags &= ~IORESOURCE_IO; + if (r->start == 0 && r->end) { + r->flags = 0; + r->end = 0; + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105, + fixup_winbond_82c105); + +static enum pci_bus_speed prop_to_pci_speed(u32 prop) +{ + switch (prop) { + case 0x01: + return PCIE_SPEED_2_5GT; + case 0x02: + return PCIE_SPEED_5_0GT; + case 0x04: + return PCIE_SPEED_8_0GT; + case 0x08: + return PCIE_SPEED_16_0GT; + case 0x10: + return PCIE_SPEED_32_0GT; + default: + pr_debug("Unexpected PCI link speed property value\n"); + return PCI_SPEED_UNKNOWN; + } +} + +int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct device_node *dn, *pdn; + struct pci_bus *bus; + u32 pcie_link_speed_stats[2]; + int rc; + + bus = bridge->bus; + + /* Rely on the pcibios_free_controller_deferred() callback. */ + pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred, + (void *) pci_bus_to_host(bus)); + + dn = pcibios_get_phb_of_node(bus); + if (!dn) + return 0; + + for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) { + rc = of_property_read_u32_array(pdn, + "ibm,pcie-link-speed-stats", + &pcie_link_speed_stats[0], 2); + if (!rc) + break; + } + + of_node_put(pdn); + + if (rc) { + pr_debug("no ibm,pcie-link-speed-stats property\n"); + return 0; + } + + bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]); + bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]); + return 0; +} diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c new file mode 100644 index 0000000000..4ba8245681 --- /dev/null +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code + * for RPA-compliant PPC64 platform. + * Copyright (C) 2003 Linda Xie + * Copyright (C) 2005 International Business Machines + * + * Updates, 2005, John Rose + * Updates, 2005, Linas Vepstas + */ + +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +struct pci_controller *init_phb_dynamic(struct device_node *dn) +{ + struct pci_controller *phb; + + pr_debug("PCI: Initializing new hotplug PHB %pOF\n", dn); + + phb = pcibios_alloc_controller(dn); + if (!phb) + return NULL; + rtas_setup_phb(phb); + pci_process_bridge_OF_ranges(phb, dn, 0); + phb->controller_ops = pseries_pci_controller_ops; + + pci_devs_phb_init_dynamic(phb); + + pseries_msi_allocate_domains(phb); + + /* Create EEH devices for the PHB */ + eeh_phb_pe_create(phb); + + if (dn->child) + pseries_eeh_init_edev_recursive(PCI_DN(dn)); + + pcibios_scan_phb(phb); + pcibios_finish_adding_to_bus(phb->bus); + + return phb; +} +EXPORT_SYMBOL_GPL(init_phb_dynamic); + +/* RPA-specific bits for removing PHBs */ +int remove_phb_dynamic(struct pci_controller *phb) +{ + struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); + struct resource *res; + int rc, i; + + pr_debug("PCI: Removing PHB %04x:%02x...\n", + pci_domain_nr(b), b->number); + + /* We cannot to remove a root bus that has children */ + if (!(list_empty(&b->children) && list_empty(&b->devices))) + return -EBUSY; + + /* We -know- there aren't any child devices anymore at this stage + * and thus, we can safely unmap the IO space as it's not in use + */ + res = &phb->io_resource; + if (res->flags & IORESOURCE_IO) { + rc = pcibios_unmap_io_space(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __func__, b->name); + return 1; + } + } + + pseries_msi_free_domains(phb); + + /* Keep a reference so phb isn't freed yet */ + get_device(&host_bridge->dev); + + /* Remove the PCI bus and unregister the bridge device from sysfs */ + phb->bus = NULL; + pci_remove_bus(b); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); + + /* Now release the IO resource */ + if (res->flags & IORESOURCE_IO) + release_resource(res); + + /* Release memory resources */ + for (i = 0; i < 3; ++i) { + res = &phb->mem_resources[i]; + if (!(res->flags & IORESOURCE_MEM)) + continue; + release_resource(res); + } + + /* + * The pci_controller data structure is freed by + * the pcibios_free_controller_deferred() callback; + * see pseries_root_bridge_prepare(). + */ + put_device(&host_bridge->dev); + + return 0; +} +EXPORT_SYMBOL_GPL(remove_phb_dynamic); diff --git a/arch/powerpc/platforms/pseries/plpks-secvar.c b/arch/powerpc/platforms/pseries/plpks-secvar.c new file mode 100644 index 0000000000..257fd1f8bc --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks-secvar.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0-only + +// Secure variable implementation using the PowerVM LPAR Platform KeyStore (PLPKS) +// +// Copyright 2022, 2023 IBM Corporation +// Authors: Russell Currey +// Andrew Donnellan +// Nayna Jain + +#define pr_fmt(fmt) "secvar: "fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Config attributes for sysfs +#define PLPKS_CONFIG_ATTR(name, fmt, func) \ + static ssize_t name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *buf) \ + { \ + return sysfs_emit(buf, fmt, func()); \ + } \ + static struct kobj_attribute attr_##name = __ATTR_RO(name) + +PLPKS_CONFIG_ATTR(version, "%u\n", plpks_get_version); +PLPKS_CONFIG_ATTR(max_object_size, "%u\n", plpks_get_maxobjectsize); +PLPKS_CONFIG_ATTR(total_size, "%u\n", plpks_get_totalsize); +PLPKS_CONFIG_ATTR(used_space, "%u\n", plpks_get_usedspace); +PLPKS_CONFIG_ATTR(supported_policies, "%08x\n", plpks_get_supportedpolicies); +PLPKS_CONFIG_ATTR(signed_update_algorithms, "%016llx\n", plpks_get_signedupdatealgorithms); + +static const struct attribute *config_attrs[] = { + &attr_version.attr, + &attr_max_object_size.attr, + &attr_total_size.attr, + &attr_used_space.attr, + &attr_supported_policies.attr, + &attr_signed_update_algorithms.attr, + NULL, +}; + +static u32 get_policy(const char *name) +{ + if ((strcmp(name, "db") == 0) || + (strcmp(name, "dbx") == 0) || + (strcmp(name, "grubdb") == 0) || + (strcmp(name, "grubdbx") == 0) || + (strcmp(name, "sbat") == 0)) + return (PLPKS_WORLDREADABLE | PLPKS_SIGNEDUPDATE); + else + return PLPKS_SIGNEDUPDATE; +} + +static const char * const plpks_var_names[] = { + "PK", + "KEK", + "db", + "dbx", + "grubdb", + "grubdbx", + "sbat", + "moduledb", + "trustedcadb", + NULL, +}; + +static int plpks_get_variable(const char *key, u64 key_len, u8 *data, + u64 *data_size) +{ + struct plpks_var var = {0}; + int rc = 0; + + // We subtract 1 from key_len because we don't need to include the + // null terminator at the end of the string + var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL); + if (!var.name) + return -ENOMEM; + rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name, + key_len - 1); + if (rc < 0) + goto err; + var.namelen = rc * 2; + + var.os = PLPKS_VAR_LINUX; + if (data) { + var.data = data; + var.datalen = *data_size; + } + rc = plpks_read_os_var(&var); + + if (rc) + goto err; + + *data_size = var.datalen; + +err: + kfree(var.name); + if (rc && rc != -ENOENT) { + pr_err("Failed to read variable '%s': %d\n", key, rc); + // Return -EIO since userspace probably doesn't care about the + // specific error + rc = -EIO; + } + return rc; +} + +static int plpks_set_variable(const char *key, u64 key_len, u8 *data, + u64 data_size) +{ + struct plpks_var var = {0}; + int rc = 0; + u64 flags; + + // Secure variables need to be prefixed with 8 bytes of flags. + // We only want to perform the write if we have at least one byte of data. + if (data_size <= sizeof(flags)) + return -EINVAL; + + // We subtract 1 from key_len because we don't need to include the + // null terminator at the end of the string + var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL); + if (!var.name) + return -ENOMEM; + rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name, + key_len - 1); + if (rc < 0) + goto err; + var.namelen = rc * 2; + + // Flags are contained in the first 8 bytes of the buffer, and are always big-endian + flags = be64_to_cpup((__be64 *)data); + + var.datalen = data_size - sizeof(flags); + var.data = data + sizeof(flags); + var.os = PLPKS_VAR_LINUX; + var.policy = get_policy(key); + + // Unlike in the read case, the plpks error code can be useful to + // userspace on write, so we return it rather than just -EIO + rc = plpks_signed_update_var(&var, flags); + +err: + kfree(var.name); + return rc; +} + +// PLPKS dynamic secure boot doesn't give us a format string in the same way OPAL does. +// Instead, report the format using the SB_VERSION variable in the keystore. +// The string is made up by us, and takes the form "ibm,plpks-sb-v" (or "ibm,plpks-sb-unknown" +// if the SB_VERSION variable doesn't exist). Hypervisor defines the SB_VERSION variable as a +// "1 byte unsigned integer value". +static ssize_t plpks_secvar_format(char *buf, size_t bufsize) +{ + struct plpks_var var = {0}; + ssize_t ret; + u8 version; + + var.component = NULL; + // Only the signed variables have null bytes in their names, this one doesn't + var.name = "SB_VERSION"; + var.namelen = strlen(var.name); + var.datalen = 1; + var.data = &version; + + // Unlike the other vars, SB_VERSION is owned by firmware instead of the OS + ret = plpks_read_fw_var(&var); + if (ret) { + if (ret == -ENOENT) { + ret = snprintf(buf, bufsize, "ibm,plpks-sb-unknown"); + } else { + pr_err("Error %ld reading SB_VERSION from firmware\n", ret); + ret = -EIO; + } + goto err; + } + + ret = snprintf(buf, bufsize, "ibm,plpks-sb-v%hhu", version); +err: + return ret; +} + +static int plpks_max_size(u64 *max_size) +{ + // The max object size reported by the hypervisor is accurate for the + // object itself, but we use the first 8 bytes of data on write as the + // signed update flags, so the max size a user can write is larger. + *max_size = (u64)plpks_get_maxobjectsize() + sizeof(u64); + + return 0; +} + + +static const struct secvar_operations plpks_secvar_ops = { + .get = plpks_get_variable, + .set = plpks_set_variable, + .format = plpks_secvar_format, + .max_size = plpks_max_size, + .config_attrs = config_attrs, + .var_names = plpks_var_names, +}; + +static int plpks_secvar_init(void) +{ + if (!plpks_is_available()) + return -ENODEV; + + return set_secvar_ops(&plpks_secvar_ops); +} +machine_device_initcall(pseries, plpks_secvar_init); diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c new file mode 100644 index 0000000000..2d40304eb6 --- /dev/null +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -0,0 +1,711 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * POWER LPAR Platform KeyStore(PLPKS) + * Copyright (C) 2022 IBM Corporation + * Author: Nayna Jain + * + * Provides access to variables stored in Power LPAR Platform KeyStore(PLPKS). + */ + +#define pr_fmt(fmt) "plpks: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static u8 *ospassword; +static u16 ospasswordlength; + +// Retrieved with H_PKS_GET_CONFIG +static u8 version; +static u16 objoverhead; +static u16 maxpwsize; +static u16 maxobjsize; +static s16 maxobjlabelsize; +static u32 totalsize; +static u32 usedspace; +static u32 supportedpolicies; +static u32 maxlargeobjectsize; +static u64 signedupdatealgorithms; + +struct plpks_auth { + u8 version; + u8 consumer; + __be64 rsvd0; + __be32 rsvd1; + __be16 passwordlength; + u8 password[]; +} __packed __aligned(16); + +struct label_attr { + u8 prefix[8]; + u8 version; + u8 os; + u8 length; + u8 reserved[5]; +}; + +struct label { + struct label_attr attr; + u8 name[PLPKS_MAX_NAME_SIZE]; + size_t size; +}; + +static int pseries_status_to_err(int rc) +{ + int err; + + switch (rc) { + case H_SUCCESS: + err = 0; + break; + case H_FUNCTION: + err = -ENXIO; + break; + case H_PARAMETER: + case H_P2: + case H_P3: + case H_P4: + case H_P5: + case H_P6: + err = -EINVAL; + break; + case H_NOT_FOUND: + err = -ENOENT; + break; + case H_BUSY: + case H_LONG_BUSY_ORDER_1_MSEC: + case H_LONG_BUSY_ORDER_10_MSEC: + case H_LONG_BUSY_ORDER_100_MSEC: + case H_LONG_BUSY_ORDER_1_SEC: + case H_LONG_BUSY_ORDER_10_SEC: + case H_LONG_BUSY_ORDER_100_SEC: + err = -EBUSY; + break; + case H_AUTHORITY: + err = -EPERM; + break; + case H_NO_MEM: + err = -ENOMEM; + break; + case H_RESOURCE: + err = -EEXIST; + break; + case H_TOO_BIG: + err = -EFBIG; + break; + case H_STATE: + err = -EIO; + break; + case H_R_STATE: + err = -EIO; + break; + case H_IN_USE: + err = -EEXIST; + break; + case H_ABORTED: + err = -EIO; + break; + default: + err = -EINVAL; + } + + pr_debug("Converted hypervisor code %d to Linux %d\n", rc, err); + + return err; +} + +static int plpks_gen_password(void) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + u8 *password, consumer = PLPKS_OS_OWNER; + int rc; + + // If we booted from kexec, we could be reusing an existing password already + if (ospassword) { + pr_debug("Password of length %u already in use\n", ospasswordlength); + return 0; + } + + // The password must not cross a page boundary, so we align to the next power of 2 + password = kzalloc(roundup_pow_of_two(maxpwsize), GFP_KERNEL); + if (!password) + return -ENOMEM; + + rc = plpar_hcall(H_PKS_GEN_PASSWORD, retbuf, consumer, 0, + virt_to_phys(password), maxpwsize); + + if (!rc) { + ospasswordlength = maxpwsize; + ospassword = kzalloc(maxpwsize, GFP_KERNEL); + if (!ospassword) { + kfree(password); + return -ENOMEM; + } + memcpy(ospassword, password, ospasswordlength); + } else { + if (rc == H_IN_USE) { + pr_warn("Password already set - authenticated operations will fail\n"); + rc = 0; + } else { + goto out; + } + } +out: + kfree(password); + + return pseries_status_to_err(rc); +} + +static struct plpks_auth *construct_auth(u8 consumer) +{ + struct plpks_auth *auth; + + if (consumer > PLPKS_OS_OWNER) + return ERR_PTR(-EINVAL); + + // The auth structure must not cross a page boundary and must be + // 16 byte aligned. We align to the next largest power of 2 + auth = kzalloc(roundup_pow_of_two(struct_size(auth, password, maxpwsize)), GFP_KERNEL); + if (!auth) + return ERR_PTR(-ENOMEM); + + auth->version = 1; + auth->consumer = consumer; + + if (consumer == PLPKS_FW_OWNER || consumer == PLPKS_BOOTLOADER_OWNER) + return auth; + + memcpy(auth->password, ospassword, ospasswordlength); + + auth->passwordlength = cpu_to_be16(ospasswordlength); + + return auth; +} + +/* + * Label is combination of label attributes + name. + * Label attributes are used internally by kernel and not exposed to the user. + */ +static struct label *construct_label(char *component, u8 varos, u8 *name, + u16 namelen) +{ + struct label *label; + size_t slen = 0; + + if (!name || namelen > PLPKS_MAX_NAME_SIZE) + return ERR_PTR(-EINVAL); + + // Support NULL component for signed updates + if (component) { + slen = strlen(component); + if (slen > sizeof(label->attr.prefix)) + return ERR_PTR(-EINVAL); + } + + // The label structure must not cross a page boundary, so we align to the next power of 2 + label = kzalloc(roundup_pow_of_two(sizeof(*label)), GFP_KERNEL); + if (!label) + return ERR_PTR(-ENOMEM); + + if (component) + memcpy(&label->attr.prefix, component, slen); + + label->attr.version = PLPKS_LABEL_VERSION; + label->attr.os = varos; + label->attr.length = PLPKS_MAX_LABEL_ATTR_SIZE; + memcpy(&label->name, name, namelen); + + label->size = sizeof(struct label_attr) + namelen; + + return label; +} + +static int _plpks_get_config(void) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct config { + u8 version; + u8 flags; + __be16 rsvd0; + __be16 objoverhead; + __be16 maxpwsize; + __be16 maxobjlabelsize; + __be16 maxobjsize; + __be32 totalsize; + __be32 usedspace; + __be32 supportedpolicies; + __be32 maxlargeobjectsize; + __be64 signedupdatealgorithms; + u8 rsvd1[476]; + } __packed * config; + size_t size; + int rc = 0; + + size = sizeof(*config); + + // Config struct must not cross a page boundary. So long as the struct + // size is a power of 2, this should be fine as alignment is guaranteed + config = kzalloc(size, GFP_KERNEL); + if (!config) { + rc = -ENOMEM; + goto err; + } + + rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(config), size); + + if (rc != H_SUCCESS) { + rc = pseries_status_to_err(rc); + goto err; + } + + version = config->version; + objoverhead = be16_to_cpu(config->objoverhead); + maxpwsize = be16_to_cpu(config->maxpwsize); + maxobjsize = be16_to_cpu(config->maxobjsize); + maxobjlabelsize = be16_to_cpu(config->maxobjlabelsize); + totalsize = be32_to_cpu(config->totalsize); + usedspace = be32_to_cpu(config->usedspace); + supportedpolicies = be32_to_cpu(config->supportedpolicies); + maxlargeobjectsize = be32_to_cpu(config->maxlargeobjectsize); + signedupdatealgorithms = be64_to_cpu(config->signedupdatealgorithms); + + // Validate that the numbers we get back match the requirements of the spec + if (maxpwsize < 32) { + pr_err("Invalid Max Password Size received from hypervisor (%d < 32)\n", maxpwsize); + rc = -EIO; + goto err; + } + + if (maxobjlabelsize < 255) { + pr_err("Invalid Max Object Label Size received from hypervisor (%d < 255)\n", + maxobjlabelsize); + rc = -EIO; + goto err; + } + + if (totalsize < 4096) { + pr_err("Invalid Total Size received from hypervisor (%d < 4096)\n", totalsize); + rc = -EIO; + goto err; + } + + if (version >= 3 && maxlargeobjectsize >= 65536 && maxobjsize != 0xFFFF) { + pr_err("Invalid Max Object Size (0x%x != 0xFFFF)\n", maxobjsize); + rc = -EIO; + goto err; + } + +err: + kfree(config); + return rc; +} + +u8 plpks_get_version(void) +{ + return version; +} + +u16 plpks_get_objoverhead(void) +{ + return objoverhead; +} + +u16 plpks_get_maxpwsize(void) +{ + return maxpwsize; +} + +u16 plpks_get_maxobjectsize(void) +{ + return maxobjsize; +} + +u16 plpks_get_maxobjectlabelsize(void) +{ + return maxobjlabelsize; +} + +u32 plpks_get_totalsize(void) +{ + return totalsize; +} + +u32 plpks_get_usedspace(void) +{ + // Unlike other config values, usedspace regularly changes as objects + // are updated, so we need to refresh. + int rc = _plpks_get_config(); + if (rc) { + pr_err("Couldn't get config, rc: %d\n", rc); + return 0; + } + return usedspace; +} + +u32 plpks_get_supportedpolicies(void) +{ + return supportedpolicies; +} + +u32 plpks_get_maxlargeobjectsize(void) +{ + return maxlargeobjectsize; +} + +u64 plpks_get_signedupdatealgorithms(void) +{ + return signedupdatealgorithms; +} + +u16 plpks_get_passwordlen(void) +{ + return ospasswordlength; +} + +bool plpks_is_available(void) +{ + int rc; + + if (!firmware_has_feature(FW_FEATURE_PLPKS)) + return false; + + rc = _plpks_get_config(); + if (rc) + return false; + + return true; +} + +static int plpks_confirm_object_flushed(struct label *label, + struct plpks_auth *auth) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + bool timed_out = true; + u64 timeout = 0; + u8 status; + int rc; + + do { + rc = plpar_hcall(H_PKS_CONFIRM_OBJECT_FLUSHED, retbuf, + virt_to_phys(auth), virt_to_phys(label), + label->size); + + status = retbuf[0]; + if (rc) { + timed_out = false; + if (rc == H_NOT_FOUND && status == 1) + rc = 0; + break; + } + + if (!rc && status == 1) { + timed_out = false; + break; + } + + usleep_range(PLPKS_FLUSH_SLEEP, + PLPKS_FLUSH_SLEEP + PLPKS_FLUSH_SLEEP_RANGE); + timeout = timeout + PLPKS_FLUSH_SLEEP; + } while (timeout < PLPKS_MAX_TIMEOUT); + + if (timed_out) + return -ETIMEDOUT; + + return pseries_status_to_err(rc); +} + +int plpks_signed_update_var(struct plpks_var *var, u64 flags) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + int rc; + struct label *label; + struct plpks_auth *auth; + u64 continuetoken = 0; + u64 timeout = 0; + + if (!var->data || var->datalen <= 0 || var->namelen > PLPKS_MAX_NAME_SIZE) + return -EINVAL; + + if (!(var->policy & PLPKS_SIGNEDUPDATE)) + return -EINVAL; + + // Signed updates need the component to be NULL. + if (var->component) + return -EINVAL; + + auth = construct_auth(PLPKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var->component, var->os, var->name, var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + do { + rc = plpar_hcall9(H_PKS_SIGNED_UPDATE, retbuf, + virt_to_phys(auth), virt_to_phys(label), + label->size, var->policy, flags, + virt_to_phys(var->data), var->datalen, + continuetoken); + + continuetoken = retbuf[0]; + if (pseries_status_to_err(rc) == -EBUSY) { + int delay_ms = get_longbusy_msecs(rc); + mdelay(delay_ms); + timeout += delay_ms; + } + rc = pseries_status_to_err(rc); + } while (rc == -EBUSY && timeout < PLPKS_MAX_TIMEOUT); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + kfree(label); +out: + kfree(auth); + + return rc; +} + +int plpks_write_var(struct plpks_var var) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + int rc; + + if (!var.component || !var.data || var.datalen <= 0 || + var.namelen > PLPKS_MAX_NAME_SIZE || var.datalen > PLPKS_MAX_DATA_SIZE) + return -EINVAL; + + if (var.policy & PLPKS_SIGNEDUPDATE) + return -EINVAL; + + auth = construct_auth(PLPKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(var.component, var.os, var.name, var.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + rc = plpar_hcall(H_PKS_WRITE_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, var.policy, + virt_to_phys(var.data), var.datalen); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + rc = pseries_status_to_err(rc); + kfree(label); +out: + kfree(auth); + + return rc; +} + +int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label; + int rc; + + if (vname.namelen > PLPKS_MAX_NAME_SIZE) + return -EINVAL; + + auth = construct_auth(PLPKS_OS_OWNER); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + label = construct_label(component, varos, vname.name, vname.namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out; + } + + rc = plpar_hcall(H_PKS_REMOVE_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size); + + if (!rc) + rc = plpks_confirm_object_flushed(label, auth); + + rc = pseries_status_to_err(rc); + kfree(label); +out: + kfree(auth); + + return rc; +} + +static int plpks_read_var(u8 consumer, struct plpks_var *var) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 }; + struct plpks_auth *auth; + struct label *label = NULL; + u8 *output; + int rc; + + if (var->namelen > PLPKS_MAX_NAME_SIZE) + return -EINVAL; + + auth = construct_auth(consumer); + if (IS_ERR(auth)) + return PTR_ERR(auth); + + if (consumer == PLPKS_OS_OWNER) { + label = construct_label(var->component, var->os, var->name, + var->namelen); + if (IS_ERR(label)) { + rc = PTR_ERR(label); + goto out_free_auth; + } + } + + output = kzalloc(maxobjsize, GFP_KERNEL); + if (!output) { + rc = -ENOMEM; + goto out_free_label; + } + + if (consumer == PLPKS_OS_OWNER) + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(label), label->size, virt_to_phys(output), + maxobjsize); + else + rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth), + virt_to_phys(var->name), var->namelen, virt_to_phys(output), + maxobjsize); + + + if (rc != H_SUCCESS) { + rc = pseries_status_to_err(rc); + goto out_free_output; + } + + if (!var->data || var->datalen > retbuf[0]) + var->datalen = retbuf[0]; + + var->policy = retbuf[1]; + + if (var->data) + memcpy(var->data, output, var->datalen); + + rc = 0; + +out_free_output: + kfree(output); +out_free_label: + kfree(label); +out_free_auth: + kfree(auth); + + return rc; +} + +int plpks_read_os_var(struct plpks_var *var) +{ + return plpks_read_var(PLPKS_OS_OWNER, var); +} + +int plpks_read_fw_var(struct plpks_var *var) +{ + return plpks_read_var(PLPKS_FW_OWNER, var); +} + +int plpks_read_bootloader_var(struct plpks_var *var) +{ + return plpks_read_var(PLPKS_BOOTLOADER_OWNER, var); +} + +int plpks_populate_fdt(void *fdt) +{ + int chosen_offset = fdt_path_offset(fdt, "/chosen"); + + if (chosen_offset < 0) { + pr_err("Can't find chosen node: %s\n", + fdt_strerror(chosen_offset)); + return chosen_offset; + } + + return fdt_setprop(fdt, chosen_offset, "ibm,plpks-pw", ospassword, ospasswordlength); +} + +// Once a password is registered with the hypervisor it cannot be cleared without +// rebooting the LPAR, so to keep using the PLPKS across kexec boots we need to +// recover the previous password from the FDT. +// +// There are a few challenges here. We don't want the password to be visible to +// users, so we need to clear it from the FDT. This has to be done in early boot. +// Clearing it from the FDT would make the FDT's checksum invalid, so we have to +// manually cause the checksum to be recalculated. +void __init plpks_early_init_devtree(void) +{ + void *fdt = initial_boot_params; + int chosen_node = fdt_path_offset(fdt, "/chosen"); + const u8 *password; + int len; + + if (chosen_node < 0) + return; + + password = fdt_getprop(fdt, chosen_node, "ibm,plpks-pw", &len); + if (len <= 0) { + pr_debug("Couldn't find ibm,plpks-pw node.\n"); + return; + } + + ospassword = memblock_alloc_raw(len, SMP_CACHE_BYTES); + if (!ospassword) { + pr_err("Error allocating memory for password.\n"); + goto out; + } + + memcpy(ospassword, password, len); + ospasswordlength = (u16)len; + +out: + fdt_nop_property(fdt, chosen_node, "ibm,plpks-pw"); + // Since we've cleared the password, we must update the FDT checksum + early_init_dt_verify(fdt); +} + +static __init int pseries_plpks_init(void) +{ + int rc; + + if (!firmware_has_feature(FW_FEATURE_PLPKS)) + return -ENODEV; + + rc = _plpks_get_config(); + + if (rc) { + pr_err("POWER LPAR Platform KeyStore is not supported or enabled\n"); + return rc; + } + + rc = plpks_gen_password(); + if (rc) + pr_err("Failed setting POWER LPAR Platform KeyStore Password\n"); + else + pr_info("POWER LPAR Platform KeyStore initialized successfully\n"); + + return rc; +} +machine_arch_initcall(pseries, pseries_plpks_init); diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c new file mode 100644 index 0000000000..3c290b9ed0 --- /dev/null +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Handles hot and cold plug of persistent memory regions on pseries. + */ + +#define pr_fmt(fmt) "pseries-pmem: " fmt + +#include +#include +#include +#include /* for idle_task_exit */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +static struct device_node *pmem_node; + +static ssize_t pmem_drc_add_node(u32 drc_index) +{ + struct device_node *dn; + int rc; + + pr_debug("Attempting to add pmem node, drc index: %x\n", drc_index); + + rc = dlpar_acquire_drc(drc_index); + if (rc) { + pr_err("Failed to acquire DRC, rc: %d, drc index: %x\n", + rc, drc_index); + return -EINVAL; + } + + dn = dlpar_configure_connector(cpu_to_be32(drc_index), pmem_node); + if (!dn) { + pr_err("configure-connector failed for drc %x\n", drc_index); + dlpar_release_drc(drc_index); + return -EINVAL; + } + + /* NB: The of reconfig notifier creates platform device from the node */ + rc = dlpar_attach_node(dn, pmem_node); + if (rc) { + pr_err("Failed to attach node %pOF, rc: %d, drc index: %x\n", + dn, rc, drc_index); + + if (dlpar_release_drc(drc_index)) + dlpar_free_cc_nodes(dn); + + return rc; + } + + pr_info("Successfully added %pOF, drc index: %x\n", dn, drc_index); + + return 0; +} + +static ssize_t pmem_drc_remove_node(u32 drc_index) +{ + struct device_node *dn; + uint32_t index; + int rc; + + for_each_child_of_node(pmem_node, dn) { + if (of_property_read_u32(dn, "ibm,my-drc-index", &index)) + continue; + if (index == drc_index) + break; + } + + if (!dn) { + pr_err("Attempting to remove unused DRC index %x\n", drc_index); + return -ENODEV; + } + + pr_debug("Attempting to remove %pOF, drc index: %x\n", dn, drc_index); + + /* * NB: tears down the ibm,pmemory device as a side-effect */ + rc = dlpar_detach_node(dn); + if (rc) + return rc; + + rc = dlpar_release_drc(drc_index); + if (rc) { + pr_err("Failed to release drc (%x) for CPU %pOFn, rc: %d\n", + drc_index, dn, rc); + dlpar_attach_node(dn, pmem_node); + return rc; + } + + pr_info("Successfully removed PMEM with drc index: %x\n", drc_index); + + return 0; +} + +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) +{ + u32 drc_index; + int rc; + + /* slim chance, but we might get a hotplug event while booting */ + if (!pmem_node) + pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory"); + if (!pmem_node) { + pr_err("Hotplug event for a pmem device, but none exists\n"); + return -ENODEV; + } + + if (hp_elog->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) { + pr_err("Unsupported hotplug event type %d\n", + hp_elog->id_type); + return -EINVAL; + } + + drc_index = hp_elog->_drc_u.drc_index; + + lock_device_hotplug(); + + if (hp_elog->action == PSERIES_HP_ELOG_ACTION_ADD) { + rc = pmem_drc_add_node(drc_index); + } else if (hp_elog->action == PSERIES_HP_ELOG_ACTION_REMOVE) { + rc = pmem_drc_remove_node(drc_index); + } else { + pr_err("Unsupported hotplug action (%d)\n", hp_elog->action); + rc = -EINVAL; + } + + unlock_device_hotplug(); + return rc; +} + +static const struct of_device_id drc_pmem_match[] = { + { .type = "ibm,persistent-memory", }, + {} +}; + +static int pseries_pmem_init(void) +{ + /* + * Only supported on POWER8 and above. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return 0; + + pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory"); + if (!pmem_node) + return 0; + + /* + * The generic OF bus probe/populate handles creating platform devices + * from the child (ibm,pmemory) nodes. The generic code registers an of + * reconfig notifier to handle the hot-add/remove cases too. + */ + of_platform_bus_probe(pmem_node, drc_pmem_match, NULL); + + return 0; +} +machine_arch_initcall(pseries, pseries_pmem_init); diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c new file mode 100644 index 0000000000..3676cb2977 --- /dev/null +++ b/arch/powerpc/platforms/pseries/power.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Interface for power-management for ppc64 compliant platform + * + * Manish Ahuja + * + * Feb 2007 + * + * Copyright (C) 2007 IBM Corporation. + */ + +#include +#include +#include +#include +#include + +#include "pseries.h" + +unsigned long rtas_poweron_auto; /* default and normal state is 0 */ + +static ssize_t auto_poweron_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", rtas_poweron_auto); +} + +static ssize_t auto_poweron_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int ret; + unsigned long ups_restart; + ret = sscanf(buf, "%lu", &ups_restart); + + if ((ret == 1) && ((ups_restart == 1) || (ups_restart == 0))){ + rtas_poweron_auto = ups_restart; + return n; + } + return -EINVAL; +} + +static struct kobj_attribute auto_poweron_attr = + __ATTR(auto_poweron, 0644, auto_poweron_show, auto_poweron_store); + +#ifndef CONFIG_PM +struct kobject *power_kobj; + +static struct attribute *g[] = { + &auto_poweron_attr.attr, + NULL, +}; + +static const struct attribute_group attr_group = { + .attrs = g, +}; + +static int __init pm_init(void) +{ + power_kobj = kobject_create_and_add("power", NULL); + if (!power_kobj) + return -ENOMEM; + return sysfs_create_group(power_kobj, &attr_group); +} +machine_core_initcall(pseries, pm_init); +#else +static int __init apo_pm_init(void) +{ + return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr)); +} +machine_device_initcall(pseries, apo_pm_init); +#endif diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h new file mode 100644 index 0000000000..8376f03f93 --- /dev/null +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2006 IBM Corporation. + */ + +#ifndef _PSERIES_PSERIES_H +#define _PSERIES_PSERIES_H + +#include +#include + +struct device_node; + +void __init request_event_sources_irqs(struct device_node *np, + irq_handler_t handler, const char *name); + +#include + +struct pt_regs; + +extern int pSeries_system_reset_exception(struct pt_regs *regs); +extern int pSeries_machine_check_exception(struct pt_regs *regs); +extern long pseries_machine_check_realmode(struct pt_regs *regs); +void pSeries_machine_check_log_err(void); + +#ifdef CONFIG_SMP +extern void smp_init_pseries(void); + +/* Get state of physical CPU from query_cpu_stopped */ +int smp_query_cpu_stopped(unsigned int pcpu); +#define QCSS_STOPPED 0 +#define QCSS_STOPPING 1 +#define QCSS_NOT_STOPPED 2 +#define QCSS_HARDWARE_ERROR -1 +#define QCSS_HARDWARE_BUSY -2 +#else +static inline void smp_init_pseries(void) { } +#endif + +extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); +void pseries_machine_kexec(struct kimage *image); + +extern void pSeries_final_fixup(void); + +/* Poweron flag used for enabling auto ups restart */ +extern unsigned long rtas_poweron_auto; + +/* Dynamic logical Partitioning/Mobility */ +extern void dlpar_free_cc_nodes(struct device_node *); +extern void dlpar_free_cc_property(struct property *); +extern struct device_node *dlpar_configure_connector(__be32, + struct device_node *); +extern int dlpar_attach_node(struct device_node *, struct device_node *); +extern int dlpar_detach_node(struct device_node *); +extern int dlpar_acquire_drc(u32 drc_index); +extern int dlpar_release_drc(u32 drc_index); +extern int dlpar_unisolate_drc(u32 drc_index); + +void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog); +int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog); + +#ifdef CONFIG_MEMORY_HOTPLUG +int dlpar_memory(struct pseries_hp_errorlog *hp_elog); +int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog); +#else +static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog) +{ + return -EOPNOTSUPP; +} +static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_HOTPLUG_CPU +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog); +void pseries_cpu_hotplug_init(void); +#else +static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) +{ + return -EOPNOTSUPP; +} +static inline void pseries_cpu_hotplug_init(void) { } +#endif + +/* PCI root bridge prepare function override for pseries */ +struct pci_host_bridge; +int pseries_root_bridge_prepare(struct pci_host_bridge *bridge); + +extern struct pci_controller_ops pseries_pci_controller_ops; +int pseries_msi_allocate_domains(struct pci_controller *phb); +void pseries_msi_free_domains(struct pci_controller *phb); + +extern int CMO_PrPSP; +extern int CMO_SecPSP; +extern unsigned long CMO_PageSize; + +static inline int cmo_get_primary_psp(void) +{ + return CMO_PrPSP; +} + +static inline int cmo_get_secondary_psp(void) +{ + return CMO_SecPSP; +} + +static inline unsigned long cmo_get_page_size(void) +{ + return CMO_PageSize; +} + +int dlpar_workqueue_init(void); + +extern u32 pseries_security_flavor; +void pseries_setup_security_mitigations(void); + +#ifdef CONFIG_PPC_64S_HASH_MMU +void pseries_lpar_read_hblkrm_characteristics(void); +#else +static inline void pseries_lpar_read_hblkrm_characteristics(void) { } +#endif + +void pseries_rng_init(void); +#ifdef CONFIG_SPAPR_TCE_IOMMU +struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, + struct pci_dev *pdev); +#endif + +#endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c new file mode 100644 index 0000000000..2c661b7982 --- /dev/null +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * POWER platform energy management driver + * Copyright (C) 2010 IBM Corporation + * + * This pseries platform device driver provides access to + * platform energy management capabilities. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define MODULE_VERS "1.0" +#define MODULE_NAME "pseries_energy" + +/* Driver flags */ + +static int sysfs_entries; + +/* Helper routines */ + +/* Helper Routines to convert between drc_index to cpu numbers */ + +static u32 cpu_to_drc_index(int cpu) +{ + struct device_node *dn = NULL; + struct property *info; + int thread_index; + int rc = 1; + u32 ret = 0; + + dn = of_find_node_by_path("/cpus"); + if (dn == NULL) + goto err; + + /* Convert logical cpu number to core number */ + thread_index = cpu_core_index_of_thread(cpu); + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info) { + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + else + value++; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (thread_index < drc.last_drc_index) + break; + } + + ret = drc.drc_index_start + (thread_index * drc.sequential_inc); + } else { + u32 nr_drc_indexes, thread_drc_index; + + /* + * The first element of ibm,drc-indexes array is the + * number of drc_indexes returned in the list. Hence + * thread_index+1 will get the drc_index corresponding + * to core number thread_index. + */ + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", + 0, &nr_drc_indexes); + if (rc) + goto err_of_node_put; + + WARN_ON_ONCE(thread_index > nr_drc_indexes); + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", + thread_index + 1, + &thread_drc_index); + if (rc) + goto err_of_node_put; + + ret = thread_drc_index; + } + + rc = 0; + +err_of_node_put: + of_node_put(dn); +err: + if (rc) + printk(KERN_WARNING "cpu_to_drc_index(%d) failed", cpu); + return ret; +} + +static int drc_index_to_cpu(u32 drc_index) +{ + struct device_node *dn = NULL; + struct property *info; + const int *indexes; + int thread_index = 0, cpu = 0; + int rc = 1; + + dn = of_find_node_by_path("/cpus"); + if (dn == NULL) + goto err; + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info) { + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + else + value++; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (drc_index > drc.last_drc_index) { + cpu += drc.num_sequential_elems; + continue; + } + cpu += ((drc_index - drc.drc_index_start) / + drc.sequential_inc); + + thread_index = cpu_first_thread_of_core(cpu); + rc = 0; + break; + } + } else { + unsigned long int i; + + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); + if (indexes == NULL) + goto err_of_node_put; + /* + * First element in the array is the number of drc_indexes + * returned. Search through the list to find the matching + * drc_index and get the core number + */ + for (i = 0; i < indexes[0]; i++) { + if (indexes[i + 1] == drc_index) + break; + } + /* Convert core number to logical cpu number */ + thread_index = cpu_first_thread_of_core(i); + rc = 0; + } + +err_of_node_put: + of_node_put(dn); +err: + if (rc) + printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index); + return thread_index; +} + +/* + * pseries hypervisor call H_BEST_ENERGY provides hints to OS on + * preferred logical cpus to activate or deactivate for optimized + * energy consumption. + */ + +#define FLAGS_MODE1 0x004E200000080E01UL +#define FLAGS_MODE2 0x004E200000080401UL +#define FLAGS_ACTIVATE 0x100 + +static ssize_t get_best_energy_list(char *page, int activate) +{ + int rc, cnt, i, cpu; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long flags = 0; + u32 *buf_page; + char *s = page; + + buf_page = (u32 *) get_zeroed_page(GFP_KERNEL); + if (!buf_page) + return -ENOMEM; + + flags = FLAGS_MODE1; + if (activate) + flags |= FLAGS_ACTIVATE; + + rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page), + 0, 0, 0, 0, 0, 0); + if (rc != H_SUCCESS) { + free_page((unsigned long) buf_page); + return -EINVAL; + } + + cnt = retbuf[0]; + for (i = 0; i < cnt; i++) { + cpu = drc_index_to_cpu(buf_page[2*i+1]); + if ((cpu_online(cpu) && !activate) || + (!cpu_online(cpu) && activate)) + s += sprintf(s, "%d,", cpu); + } + if (s > page) { /* Something to show */ + s--; /* Suppress last comma */ + s += sprintf(s, "\n"); + } + + free_page((unsigned long) buf_page); + return s-page; +} + +static ssize_t get_best_energy_data(struct device *dev, + char *page, int activate) +{ + int rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + unsigned long flags = 0; + + flags = FLAGS_MODE2; + if (activate) + flags |= FLAGS_ACTIVATE; + + rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, + cpu_to_drc_index(dev->id), + 0, 0, 0, 0, 0, 0, 0); + + if (rc != H_SUCCESS) + return -EINVAL; + + return sprintf(page, "%lu\n", retbuf[1] >> 32); +} + +/* Wrapper functions */ + +static ssize_t cpu_activate_hint_list_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + return get_best_energy_list(page, 1); +} + +static ssize_t cpu_deactivate_hint_list_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + return get_best_energy_list(page, 0); +} + +static ssize_t percpu_activate_hint_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + return get_best_energy_data(dev, page, 1); +} + +static ssize_t percpu_deactivate_hint_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + return get_best_energy_data(dev, page, 0); +} + +/* + * Create sysfs interface: + * /sys/devices/system/cpu/pseries_activate_hint_list + * /sys/devices/system/cpu/pseries_deactivate_hint_list + * Comma separated list of cpus to activate or deactivate + * /sys/devices/system/cpu/cpuN/pseries_activate_hint + * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint + * Per-cpu value of the hint + */ + +static struct device_attribute attr_cpu_activate_hint_list = + __ATTR(pseries_activate_hint_list, 0444, + cpu_activate_hint_list_show, NULL); + +static struct device_attribute attr_cpu_deactivate_hint_list = + __ATTR(pseries_deactivate_hint_list, 0444, + cpu_deactivate_hint_list_show, NULL); + +static struct device_attribute attr_percpu_activate_hint = + __ATTR(pseries_activate_hint, 0444, + percpu_activate_hint_show, NULL); + +static struct device_attribute attr_percpu_deactivate_hint = + __ATTR(pseries_deactivate_hint, 0444, + percpu_deactivate_hint_show, NULL); + +static int __init pseries_energy_init(void) +{ + int cpu, err; + struct device *cpu_dev, *dev_root; + + if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY)) + return 0; /* H_BEST_ENERGY hcall not supported */ + + /* Create the sysfs files */ + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + err = device_create_file(dev_root, &attr_cpu_activate_hint_list); + if (!err) + err = device_create_file(dev_root, &attr_cpu_deactivate_hint_list); + put_device(dev_root); + if (err) + return err; + } + + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + err = device_create_file(cpu_dev, + &attr_percpu_activate_hint); + if (err) + break; + err = device_create_file(cpu_dev, + &attr_percpu_deactivate_hint); + if (err) + break; + } + + if (err) + return err; + + sysfs_entries = 1; /* Removed entries on cleanup */ + return 0; + +} + +static void __exit pseries_energy_cleanup(void) +{ + int cpu; + struct device *cpu_dev, *dev_root; + + if (!sysfs_entries) + return; + + /* Remove the sysfs files */ + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + device_remove_file(dev_root, &attr_cpu_activate_hint_list); + device_remove_file(dev_root, &attr_cpu_deactivate_hint_list); + put_device(dev_root); + } + + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + sysfs_remove_file(&cpu_dev->kobj, + &attr_percpu_activate_hint.attr); + sysfs_remove_file(&cpu_dev->kobj, + &attr_percpu_deactivate_hint.attr); + } +} + +module_init(pseries_energy_init); +module_exit(pseries_energy_cleanup); +MODULE_DESCRIPTION("Driver for pSeries platform energy management"); +MODULE_AUTHOR("Vaidyanathan Srinivasan"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c new file mode 100644 index 0000000000..adafd593d9 --- /dev/null +++ b/arch/powerpc/platforms/pseries/ras.c @@ -0,0 +1,882 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2001 Dave Engebretsen IBM Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "pseries.h" + +static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; +static DEFINE_SPINLOCK(ras_log_buf_lock); + +static int ras_check_exception_token; + +#define EPOW_SENSOR_TOKEN 9 +#define EPOW_SENSOR_INDEX 0 + +/* EPOW events counter variable */ +static int num_epow_events; + +static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); +static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); +static irqreturn_t ras_error_interrupt(int irq, void *dev_id); + +/* RTAS pseries MCE errorlog section. */ +struct pseries_mc_errorlog { + __be32 fru_id; + __be32 proc_id; + u8 error_type; + /* + * sub_err_type (1 byte). Bit fields depends on error_type + * + * MSB0 + * | + * V + * 01234567 + * XXXXXXXX + * + * For error_type == MC_ERROR_TYPE_UE + * XXXXXXXX + * X 1: Permanent or Transient UE. + * X 1: Effective address provided. + * X 1: Logical address provided. + * XX 2: Reserved. + * XXX 3: Type of UE error. + * + * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB + * XXXXXXXX + * X 1: Effective address provided. + * XXXXX 5: Reserved. + * XX 2: Type of SLB/ERAT/TLB error. + * + * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS + * XXXXXXXX + * X 1: Error causing address provided. + * XXX 3: Type of error. + * XXXX 4: Reserved. + */ + u8 sub_err_type; + u8 reserved_1[6]; + __be64 effective_address; + __be64 logical_address; +} __packed; + +/* RTAS pseries MCE error types */ +#define MC_ERROR_TYPE_UE 0x00 +#define MC_ERROR_TYPE_SLB 0x01 +#define MC_ERROR_TYPE_ERAT 0x02 +#define MC_ERROR_TYPE_UNKNOWN 0x03 +#define MC_ERROR_TYPE_TLB 0x04 +#define MC_ERROR_TYPE_D_CACHE 0x05 +#define MC_ERROR_TYPE_I_CACHE 0x07 +#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 + +/* RTAS pseries MCE error sub types */ +#define MC_ERROR_UE_INDETERMINATE 0 +#define MC_ERROR_UE_IFETCH 1 +#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 +#define MC_ERROR_UE_LOAD_STORE 3 +#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 + +#define UE_EFFECTIVE_ADDR_PROVIDED 0x40 +#define UE_LOGICAL_ADDR_PROVIDED 0x20 +#define MC_EFFECTIVE_ADDR_PROVIDED 0x80 + +#define MC_ERROR_SLB_PARITY 0 +#define MC_ERROR_SLB_MULTIHIT 1 +#define MC_ERROR_SLB_INDETERMINATE 2 + +#define MC_ERROR_ERAT_PARITY 1 +#define MC_ERROR_ERAT_MULTIHIT 2 +#define MC_ERROR_ERAT_INDETERMINATE 3 + +#define MC_ERROR_TLB_PARITY 1 +#define MC_ERROR_TLB_MULTIHIT 2 +#define MC_ERROR_TLB_INDETERMINATE 3 + +#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 +#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 + +static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) +{ + switch (mlog->error_type) { + case MC_ERROR_TYPE_UE: + return (mlog->sub_err_type & 0x07); + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + case MC_ERROR_TYPE_TLB: + return (mlog->sub_err_type & 0x03); + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + return (mlog->sub_err_type & 0x70) >> 4; + default: + return 0; + } +} + +/* + * Enable the hotplug interrupt late because processing them may touch other + * devices or systems (e.g. hugepages) that have not been initialized at the + * subsys stage. + */ +static int __init init_ras_hotplug_IRQ(void) +{ + struct device_node *np; + + /* Hotplug Events */ + np = of_find_node_by_path("/event-sources/hot-plug-events"); + if (np != NULL) { + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, + "RAS_HOTPLUG"); + of_node_put(np); + } + + return 0; +} +machine_late_initcall(pseries, init_ras_hotplug_IRQ); + +/* + * Initialize handlers for the set of interrupts caused by hardware errors + * and power system events. + */ +static int __init init_ras_IRQ(void) +{ + struct device_node *np; + + ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION); + + /* Internal Errors */ + np = of_find_node_by_path("/event-sources/internal-errors"); + if (np != NULL) { + request_event_sources_irqs(np, ras_error_interrupt, + "RAS_ERROR"); + of_node_put(np); + } + + /* EPOW Events */ + np = of_find_node_by_path("/event-sources/epow-events"); + if (np != NULL) { + request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); + of_node_put(np); + } + + return 0; +} +machine_subsys_initcall(pseries, init_ras_IRQ); + +#define EPOW_SHUTDOWN_NORMAL 1 +#define EPOW_SHUTDOWN_ON_UPS 2 +#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 +#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 + +static void handle_system_shutdown(char event_modifier) +{ + switch (event_modifier) { + case EPOW_SHUTDOWN_NORMAL: + pr_emerg("Power off requested\n"); + orderly_poweroff(true); + break; + + case EPOW_SHUTDOWN_ON_UPS: + pr_emerg("Loss of system power detected. System is running on" + " UPS/battery. Check RTAS error log for details\n"); + break; + + case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: + pr_emerg("Loss of system critical functions detected. Check" + " RTAS error log for details\n"); + orderly_poweroff(true); + break; + + case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: + pr_emerg("High ambient temperature detected. Check RTAS" + " error log for details\n"); + orderly_poweroff(true); + break; + + default: + pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", + event_modifier); + } +} + +struct epow_errorlog { + unsigned char sensor_value; + unsigned char event_modifier; + unsigned char extended_modifier; + unsigned char reserved; + unsigned char platform_reason; +}; + +#define EPOW_RESET 0 +#define EPOW_WARN_COOLING 1 +#define EPOW_WARN_POWER 2 +#define EPOW_SYSTEM_SHUTDOWN 3 +#define EPOW_SYSTEM_HALT 4 +#define EPOW_MAIN_ENCLOSURE 5 +#define EPOW_POWER_OFF 7 + +static void rtas_parse_epow_errlog(struct rtas_error_log *log) +{ + struct pseries_errorlog *pseries_log; + struct epow_errorlog *epow_log; + char action_code; + char modifier; + + pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); + if (pseries_log == NULL) + return; + + epow_log = (struct epow_errorlog *)pseries_log->data; + action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ + modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ + + switch (action_code) { + case EPOW_RESET: + if (num_epow_events) { + pr_info("Non critical power/cooling issue cleared\n"); + num_epow_events--; + } + break; + + case EPOW_WARN_COOLING: + pr_info("Non-critical cooling issue detected. Check RTAS error" + " log for details\n"); + break; + + case EPOW_WARN_POWER: + pr_info("Non-critical power issue detected. Check RTAS error" + " log for details\n"); + break; + + case EPOW_SYSTEM_SHUTDOWN: + handle_system_shutdown(modifier); + break; + + case EPOW_SYSTEM_HALT: + pr_emerg("Critical power/cooling issue detected. Check RTAS" + " error log for details. Powering off.\n"); + orderly_poweroff(true); + break; + + case EPOW_MAIN_ENCLOSURE: + case EPOW_POWER_OFF: + pr_emerg("System about to lose power. Check RTAS error log " + " for details. Powering off immediately.\n"); + emergency_sync(); + kernel_power_off(); + break; + + default: + pr_err("Unknown power/cooling event (action code = %d)\n", + action_code); + } + + /* Increment epow events counter variable */ + if (action_code != EPOW_RESET) + num_epow_events++; +} + +static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) +{ + struct pseries_errorlog *pseries_log; + struct pseries_hp_errorlog *hp_elog; + + spin_lock(&ras_log_buf_lock); + + rtas_call(ras_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), + RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), + rtas_get_error_log_max()); + + pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, + PSERIES_ELOG_SECT_ID_HOTPLUG); + hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; + + /* + * Since PCI hotplug is not currently supported on pseries, put PCI + * hotplug events on the ras_log_buf to be handled by rtas_errd. + */ + if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || + hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || + hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) + queue_hotplug_event(hp_elog); + else + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + +/* Handle environmental and power warning (EPOW) interrupts. */ +static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) +{ + int state; + int critical; + + rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + + if (state > 3) + critical = 1; /* Time Critical */ + else + critical = 0; + + spin_lock(&ras_log_buf_lock); + + rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), + rtas_get_error_log_max()); + + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); + + rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + +/* + * Handle hardware error interrupts. + * + * RTAS check-exception is called to collect data on the exception. If + * the error is deemed recoverable, we log a warning and return. + * For nonrecoverable errors, an error is logged and we stop all processing + * as quickly as possible in order to prevent propagation of the failure. + */ +static irqreturn_t ras_error_interrupt(int irq, void *dev_id) +{ + struct rtas_error_log *rtas_elog; + int status; + int fatal; + + spin_lock(&ras_log_buf_lock); + + status = rtas_call(ras_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, + virq_to_hw(irq), + RTAS_INTERNAL_ERROR, 1 /* Time Critical */, + __pa(&ras_log_buf), + rtas_get_error_log_max()); + + rtas_elog = (struct rtas_error_log *)ras_log_buf; + + if (status == 0 && + rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) + fatal = 1; + else + fatal = 0; + + /* format and print the extended information */ + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); + + if (fatal) { + pr_emerg("Fatal hardware error detected. Check RTAS error" + " log for details. Powering off immediately\n"); + emergency_sync(); + kernel_power_off(); + } else { + pr_err("Recoverable hardware error detected\n"); + } + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + +/* + * Some versions of FWNMI place the buffer inside the 4kB page starting at + * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. + */ +#define VALID_FWNMI_BUFFER(A) \ + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) + +static inline struct rtas_error_log *fwnmi_get_errlog(void) +{ + return (struct rtas_error_log *)local_paca->mce_data_buf; +} + +static __be64 *fwnmi_get_savep(struct pt_regs *regs) +{ + unsigned long savep_ra; + + /* Mask top two bits */ + savep_ra = regs->gpr[3] & ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(savep_ra)) { + printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); + return NULL; + } + + return __va(savep_ra); +} + +/* + * Get the error information for errors coming through the + * FWNMI vectors. The pt_regs' r3 will be updated to reflect + * the actual r3 if possible, and a ptr to the error log entry + * will be returned if found. + * + * Use one buffer mce_data_buf per cpu to store RTAS error. + * + * The mce_data_buf does not have any locks or protection around it, + * if a second machine check comes in, or a system reset is done + * before we have logged the error, then we will get corruption in the + * error log. This is preferable over holding off on calling + * ibm,nmi-interlock which would result in us checkstopping if a + * second machine check did come in. + */ +static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) +{ + struct rtas_error_log *h; + __be64 *savep; + + savep = fwnmi_get_savep(regs); + if (!savep) + return NULL; + + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ + + h = (struct rtas_error_log *)&savep[1]; + /* Use the per cpu buffer from paca to store rtas error log */ + memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); + if (!rtas_error_extended(h)) { + memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); + } else { + int len, error_log_length; + + error_log_length = 8 + rtas_error_extended_log_length(h); + len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); + memcpy(local_paca->mce_data_buf, h, len); + } + + return (struct rtas_error_log *)local_paca->mce_data_buf; +} + +/* Call this when done with the data returned by FWNMI_get_errinfo. + * It will release the saved data area for other CPUs in the + * partition to receive FWNMI errors. + */ +static void fwnmi_release_errinfo(void) +{ + struct rtas_args rtas_args; + int ret; + + /* + * On pseries, the machine check stack is limited to under 4GB, so + * args can be on-stack. + */ + rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); + ret = be32_to_cpu(rtas_args.rets[0]); + if (ret != 0) + printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); +} + +int pSeries_system_reset_exception(struct pt_regs *regs) +{ +#ifdef __LITTLE_ENDIAN__ + /* + * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try + * to detect the bad SRR1 pattern here. Flip the NIP back to correct + * endian for reporting purposes. Unfortunately the MSR can't be fixed, + * so clear it. It will be missing MSR_RI so we won't try to recover. + */ + if ((be64_to_cpu(regs->msr) & + (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| + MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { + regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); + regs_set_return_msr(regs, 0); + } +#endif + + if (fwnmi_active) { + __be64 *savep; + + /* + * Firmware (PowerVM and KVM) saves r3 to a save area like + * machine check, which is not exactly what PAPR (2.9) + * suggests but there is no way to detect otherwise, so this + * is the interface now. + * + * System resets do not save any error log or require an + * "ibm,nmi-interlock" rtas call to release. + */ + + savep = fwnmi_get_savep(regs); + if (savep) + regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ + } + + if (smp_handle_nmi_ipi(regs)) + return 1; + + return 0; /* need to perform reset */ +} + +static int mce_handle_err_realmode(int disposition, u8 error_type) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_ERAT: + flush_erat(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + case MC_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; +#endif + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif + return disposition; +} + +static int mce_handle_err_virtmode(struct pt_regs *regs, + struct rtas_error_log *errp, + struct pseries_mc_errorlog *mce_log, + int disposition) +{ + struct mce_error_info mce_err = { 0 }; + int initiator = rtas_error_initiator(errp); + int severity = rtas_error_severity(errp); + unsigned long eaddr = 0, paddr = 0; + u8 error_type, err_sub_type; + + if (!mce_log) + goto out; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + + if (initiator == RTAS_INITIATOR_UNKNOWN) + mce_err.initiator = MCE_INITIATOR_UNKNOWN; + else if (initiator == RTAS_INITIATOR_CPU) + mce_err.initiator = MCE_INITIATOR_CPU; + else if (initiator == RTAS_INITIATOR_PCI) + mce_err.initiator = MCE_INITIATOR_PCI; + else if (initiator == RTAS_INITIATOR_ISA) + mce_err.initiator = MCE_INITIATOR_ISA; + else if (initiator == RTAS_INITIATOR_MEMORY) + mce_err.initiator = MCE_INITIATOR_MEMORY; + else if (initiator == RTAS_INITIATOR_POWERMGM) + mce_err.initiator = MCE_INITIATOR_POWERMGM; + else + mce_err.initiator = MCE_INITIATOR_UNKNOWN; + + if (severity == RTAS_SEVERITY_NO_ERROR) + mce_err.severity = MCE_SEV_NO_ERROR; + else if (severity == RTAS_SEVERITY_EVENT) + mce_err.severity = MCE_SEV_WARNING; + else if (severity == RTAS_SEVERITY_WARNING) + mce_err.severity = MCE_SEV_WARNING; + else if (severity == RTAS_SEVERITY_ERROR_SYNC) + mce_err.severity = MCE_SEV_SEVERE; + else if (severity == RTAS_SEVERITY_ERROR) + mce_err.severity = MCE_SEV_SEVERE; + else + mce_err.severity = MCE_SEV_FATAL; + + if (severity <= RTAS_SEVERITY_ERROR_SYNC) + mce_err.sync_error = true; + else + mce_err.sync_error = false; + + mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; + mce_err.error_class = MCE_ECLASS_UNKNOWN; + + switch (error_type) { + case MC_ERROR_TYPE_UE: + mce_err.error_type = MCE_ERROR_TYPE_UE; + mce_common_process_ue(regs, &mce_err); + if (mce_err.ignore_event) + disposition = RTAS_DISP_FULLY_RECOVERED; + switch (err_sub_type) { + case MC_ERROR_UE_IFETCH: + mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH: + mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case MC_ERROR_UE_LOAD_STORE: + mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + break; + case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE: + mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + break; + case MC_ERROR_UE_INDETERMINATE: + default: + mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + + if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { + paddr = be64_to_cpu(mce_log->logical_address); + } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { + unsigned long pfn; + + pfn = addr_to_pfn(regs, eaddr); + if (pfn != ULONG_MAX) + paddr = pfn << PAGE_SHIFT; + } + + break; + case MC_ERROR_TYPE_SLB: + mce_err.error_type = MCE_ERROR_TYPE_SLB; + switch (err_sub_type) { + case MC_ERROR_SLB_PARITY: + mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case MC_ERROR_SLB_MULTIHIT: + mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case MC_ERROR_SLB_INDETERMINATE: + default: + mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_ERAT: + mce_err.error_type = MCE_ERROR_TYPE_ERAT; + switch (err_sub_type) { + case MC_ERROR_ERAT_PARITY: + mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY; + break; + case MC_ERROR_ERAT_MULTIHIT: + mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + break; + case MC_ERROR_ERAT_INDETERMINATE: + default: + mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_TLB: + mce_err.error_type = MCE_ERROR_TYPE_TLB; + switch (err_sub_type) { + case MC_ERROR_TLB_PARITY: + mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY; + break; + case MC_ERROR_TLB_MULTIHIT: + mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case MC_ERROR_TLB_INDETERMINATE: + default: + mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_D_CACHE: + mce_err.error_type = MCE_ERROR_TYPE_DCACHE; + break; + case MC_ERROR_TYPE_I_CACHE: + mce_err.error_type = MCE_ERROR_TYPE_ICACHE; + break; + case MC_ERROR_TYPE_CTRL_MEM_ACCESS: + mce_err.error_type = MCE_ERROR_TYPE_RA; + switch (err_sub_type) { + case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: + mce_err.u.ra_error_type = + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + break; + case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: + mce_err.u.ra_error_type = + MCE_RA_ERROR_LOAD_STORE_FOREIGN; + break; + } + if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) + eaddr = be64_to_cpu(mce_log->effective_address); + break; + case MC_ERROR_TYPE_UNKNOWN: + default: + mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; + break; + } +out: + save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, + &mce_err, regs->nip, eaddr, paddr); + return disposition; +} + +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log = NULL; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + + disposition = mce_handle_err_realmode(disposition, error_type); +out: + disposition = mce_handle_err_virtmode(regs, errp, mce_log, + disposition); + return disposition; +} + +/* + * Process MCE rtas errlog event. + */ +void pSeries_machine_check_log_err(void) +{ + struct rtas_error_log *err; + + err = fwnmi_get_errlog(); + log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); +} + +/* + * See if we can recover from a machine check exception. + * This is only called on power4 (or above) and only via + * the Firmware Non-Maskable Interrupts (fwnmi) handler + * which provides the error analysis for us. + * + * Return 1 if corrected (or delivered a signal). + * Return 0 if there is nothing we can do. + */ +static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) +{ + int recovered = 0; + + if (regs_is_unrecoverable(regs)) { + /* If MSR_RI isn't set, we cannot recover */ + pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); + recovered = 0; + } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { + /* Platform corrected itself */ + recovered = 1; + } else if (evt->severity == MCE_SEV_FATAL) { + /* Fatal machine check */ + pr_err("Machine check interrupt is fatal\n"); + recovered = 0; + } + + if (!recovered && evt->sync_error) { + /* + * Try to kill processes if we get a synchronous machine check + * (e.g., one caused by execution of this instruction). This + * will devolve into a panic if we try to kill init or are in + * an interrupt etc. + * + * TODO: Queue up this address for hwpoisioning later. + * TODO: This is not quite right for d-side machine + * checks ->nip is not necessarily the important + * address. + */ + if ((user_mode(regs))) { + _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); + recovered = 1; + } else if (die_will_crash()) { + /* + * die() would kill the kernel, so better to go via + * the platform reboot code that will log the + * machine check. + */ + recovered = 0; + } else { + die_mce("Machine check", regs, SIGBUS); + recovered = 1; + } + } + + return recovered; +} + +/* + * Handle a machine check. + * + * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) + * should be present. If so the handler which called us tells us if the + * error was recovered (never true if RI=0). + * + * On hardware prior to Power 4 these exceptions were asynchronous which + * means we can't tell exactly where it occurred and so we can't recover. + */ +int pSeries_machine_check_exception(struct pt_regs *regs) +{ + struct machine_check_event evt; + + if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) + return 0; + + /* Print things out */ + if (evt.version != MCE_V1) { + pr_err("Machine Check Exception, Unknown event version %d !\n", + evt.version); + return 0; + } + machine_check_print_event_info(&evt, user_mode(regs), false); + + if (recover_mce(regs, &evt)) + return 1; + + return 0; +} + +long pseries_machine_check_realmode(struct pt_regs *regs) +{ + struct rtas_error_log *errp; + int disposition; + + if (fwnmi_active) { + errp = fwnmi_get_errinfo(regs); + /* + * Call to fwnmi_release_errinfo() in real mode causes kernel + * to panic. Hence we will call it as soon as we go into + * virtual mode. + */ + disposition = mce_handle_error(regs, errp); + + fwnmi_release_errinfo(); + + if (disposition == RTAS_DISP_FULLY_RECOVERED) + return 1; + } + + return 0; +} diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c new file mode 100644 index 0000000000..599bd2c785 --- /dev/null +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI + * Hotplug and Dynamic Logical Partitioning on RPA platforms). + * + * Copyright (C) 2005 Nathan Lynch + * Copyright (C) 2005 IBM Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "of_helpers.h" + +static int pSeries_reconfig_add_node(const char *path, struct property *proplist) +{ + struct device_node *np; + int err = -ENOMEM; + + np = kzalloc(sizeof(*np), GFP_KERNEL); + if (!np) + goto out_err; + + np->full_name = kstrdup(kbasename(path), GFP_KERNEL); + if (!np->full_name) + goto out_err; + + np->properties = proplist; + of_node_set_flag(np, OF_DYNAMIC); + of_node_init(np); + + np->parent = pseries_of_derive_parent(path); + if (IS_ERR(np->parent)) { + err = PTR_ERR(np->parent); + goto out_err; + } + + err = of_attach_node(np); + if (err) { + printk(KERN_ERR "Failed to add device node %s\n", path); + goto out_err; + } + + of_node_put(np->parent); + + return 0; + +out_err: + if (np) { + of_node_put(np->parent); + kfree(np->full_name); + kfree(np); + } + return err; +} + +static int pSeries_reconfig_remove_node(struct device_node *np) +{ + struct device_node *parent, *child; + + parent = of_get_parent(np); + if (!parent) + return -EINVAL; + + if ((child = of_get_next_child(np, NULL))) { + of_node_put(child); + of_node_put(parent); + return -EBUSY; + } + + of_detach_node(np); + of_node_put(parent); + return 0; +} + +/* + * /proc/powerpc/ofdt - yucky binary interface for adding and removing + * OF device nodes. Should be deprecated as soon as we get an + * in-kernel wrapper for the RTAS ibm,configure-connector call. + */ + +static void release_prop_list(const struct property *prop) +{ + struct property *next; + for (; prop; prop = next) { + next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + } + +} + +/** + * parse_next_property - process the next property from raw input buffer + * @buf: input buffer, must be nul-terminated + * @end: end of the input buffer + 1, for validation + * @name: return value; set to property name in buf + * @length: return value; set to length of value + * @value: return value; set to the property value in buf + * + * Note that the caller must make copies of the name and value returned, + * this function does no allocation or copying of the data. Return value + * is set to the next name in buf, or NULL on error. + */ +static char * parse_next_property(char *buf, char *end, char **name, int *length, + unsigned char **value) +{ + char *tmp; + + *name = buf; + + tmp = strchr(buf, ' '); + if (!tmp) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __func__, __LINE__); + return NULL; + } + *tmp = '\0'; + + if (++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __func__, __LINE__); + return NULL; + } + + /* now we're on the length */ + *length = -1; + *length = simple_strtoul(tmp, &tmp, 10); + if (*length == -1) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __func__, __LINE__); + return NULL; + } + if (*tmp != ' ' || ++tmp >= end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __func__, __LINE__); + return NULL; + } + + /* now we're on the value */ + *value = tmp; + tmp += *length; + if (tmp > end) { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __func__, __LINE__); + return NULL; + } + else if (tmp < end && *tmp != ' ' && *tmp != '\0') { + printk(KERN_ERR "property parse failed in %s at line %d\n", + __func__, __LINE__); + return NULL; + } + tmp++; + + /* and now we should be on the next name, or the end */ + return tmp; +} + +static struct property *new_property(const char *name, const int length, + const unsigned char *value, struct property *last) +{ + struct property *new = kzalloc(sizeof(*new), GFP_KERNEL); + + if (!new) + return NULL; + + if (!(new->name = kstrdup(name, GFP_KERNEL))) + goto cleanup; + if (!(new->value = kmalloc(length + 1, GFP_KERNEL))) + goto cleanup; + + memcpy(new->value, value, length); + *(((char *)new->value) + length) = 0; + new->length = length; + new->next = last; + return new; + +cleanup: + kfree(new->name); + kfree(new->value); + kfree(new); + return NULL; +} + +static int do_add_node(char *buf, size_t bufsize) +{ + char *path, *end, *name; + struct device_node *np; + struct property *prop = NULL; + unsigned char* value; + int length, rv = 0; + + end = buf + bufsize; + path = buf; + buf = strchr(buf, ' '); + if (!buf) + return -EINVAL; + *buf = '\0'; + buf++; + + if ((np = of_find_node_by_path(path))) { + of_node_put(np); + return -EINVAL; + } + + /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */ + while (buf < end && + (buf = parse_next_property(buf, end, &name, &length, &value))) { + struct property *last = prop; + + prop = new_property(name, length, value, last); + if (!prop) { + rv = -ENOMEM; + prop = last; + goto out; + } + } + if (!buf) { + rv = -EINVAL; + goto out; + } + + rv = pSeries_reconfig_add_node(path, prop); + +out: + if (rv) + release_prop_list(prop); + return rv; +} + +static int do_remove_node(char *buf) +{ + struct device_node *node; + int rv = -ENODEV; + + if ((node = of_find_node_by_path(buf))) + rv = pSeries_reconfig_remove_node(node); + + of_node_put(node); + return rv; +} + +static char *parse_node(char *buf, size_t bufsize, struct device_node **npp) +{ + char *handle_str; + phandle handle; + *npp = NULL; + + handle_str = buf; + + buf = strchr(buf, ' '); + if (!buf) + return NULL; + *buf = '\0'; + buf++; + + handle = simple_strtoul(handle_str, NULL, 0); + + *npp = of_find_node_by_phandle(handle); + return buf; +} + +static int do_add_property(char *buf, size_t bufsize) +{ + struct property *prop = NULL; + struct device_node *np; + unsigned char *value; + char *name, *end; + int length; + end = buf + bufsize; + buf = parse_node(buf, bufsize, &np); + + if (!np) + return -ENODEV; + + if (parse_next_property(buf, end, &name, &length, &value) == NULL) + return -EINVAL; + + prop = new_property(name, length, value, NULL); + if (!prop) + return -ENOMEM; + + of_add_property(np, prop); + + return 0; +} + +static int do_remove_property(char *buf, size_t bufsize) +{ + struct device_node *np; + char *tmp; + buf = parse_node(buf, bufsize, &np); + + if (!np) + return -ENODEV; + + tmp = strchr(buf,' '); + if (tmp) + *tmp = '\0'; + + if (strlen(buf) == 0) + return -EINVAL; + + return of_remove_property(np, of_find_property(np, buf, NULL)); +} + +static int do_update_property(char *buf, size_t bufsize) +{ + struct device_node *np; + unsigned char *value; + char *name, *end, *next_prop; + int length; + struct property *newprop; + buf = parse_node(buf, bufsize, &np); + end = buf + bufsize; + + if (!np) + return -ENODEV; + + next_prop = parse_next_property(buf, end, &name, &length, &value); + if (!next_prop) + return -EINVAL; + + if (!strlen(name)) + return -ENODEV; + + newprop = new_property(name, length, value, NULL); + if (!newprop) + return -ENOMEM; + + if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size")) + slb_set_size(*(int *)value); + + return of_update_property(np, newprop); +} + +/** + * ofdt_write - perform operations on the Open Firmware device tree + * + * @file: not used + * @buf: command and arguments + * @count: size of the command buffer + * @off: not used + * + * Operations supported at this time are addition and removal of + * whole nodes along with their properties. Operations on individual + * properties are not implemented (yet). + */ +static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count, + loff_t *off) +{ + int rv; + char *kbuf; + char *tmp; + + rv = security_locked_down(LOCKDOWN_DEVICE_TREE); + if (rv) + return rv; + + kbuf = memdup_user_nul(buf, count); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + tmp = strchr(kbuf, ' '); + if (!tmp) { + rv = -EINVAL; + goto out; + } + *tmp = '\0'; + tmp++; + + if (!strcmp(kbuf, "add_node")) + rv = do_add_node(tmp, count - (tmp - kbuf)); + else if (!strcmp(kbuf, "remove_node")) + rv = do_remove_node(tmp); + else if (!strcmp(kbuf, "add_property")) + rv = do_add_property(tmp, count - (tmp - kbuf)); + else if (!strcmp(kbuf, "remove_property")) + rv = do_remove_property(tmp, count - (tmp - kbuf)); + else if (!strcmp(kbuf, "update_property")) + rv = do_update_property(tmp, count - (tmp - kbuf)); + else + rv = -EINVAL; +out: + kfree(kbuf); + return rv ? rv : count; +} + +static const struct proc_ops ofdt_proc_ops = { + .proc_write = ofdt_write, + .proc_lseek = noop_llseek, +}; + +/* create /proc/powerpc/ofdt write-only by root */ +static int proc_ppc64_create_ofdt(void) +{ + struct proc_dir_entry *ent; + + ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_proc_ops); + if (ent) + proc_set_size(ent, 0); + + return 0; +} +machine_device_initcall(pseries, proc_ppc64_create_ofdt); diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c new file mode 100644 index 0000000000..6ddfdeaace --- /dev/null +++ b/arch/powerpc/platforms/pseries/rng.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +#define pr_fmt(fmt) "pseries-rng: " fmt + +#include +#include +#include +#include +#include +#include "pseries.h" + + +static int pseries_get_random_long(unsigned long *v) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (plpar_hcall(H_RANDOM, retbuf) == H_SUCCESS) { + *v = retbuf[0]; + return 1; + } + + return 0; +} + +void __init pseries_rng_init(void) +{ + struct device_node *dn; + + dn = of_find_compatible_node(NULL, NULL, "ibm,random"); + if (!dn) + return; + ppc_md.get_random_seed = pseries_get_random_long; + of_node_put(dn); +} diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c new file mode 100644 index 0000000000..b5853e9fcc --- /dev/null +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Firmware-Assisted Dump support on POWERVM platform. + * + * Copyright 2011, Mahesh Salgaonkar, IBM Corporation. + * Copyright 2019, Hari Bathini, IBM Corporation. + */ + +#define pr_fmt(fmt) "rtas fadump: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "rtas-fadump.h" + +static struct rtas_fadump_mem_struct fdm; +static const struct rtas_fadump_mem_struct *fdm_active; + +static void rtas_fadump_update_config(struct fw_dump *fadump_conf, + const struct rtas_fadump_mem_struct *fdm) +{ + fadump_conf->boot_mem_dest_addr = + be64_to_cpu(fdm->rmr_region.destination_address); + + fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr + + fadump_conf->boot_memory_size); +} + +/* + * This function is called in the capture kernel to get configuration details + * setup in the first kernel and passed to the f/w. + */ +static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, + const struct rtas_fadump_mem_struct *fdm) +{ + fadump_conf->boot_mem_addr[0] = + be64_to_cpu(fdm->rmr_region.source_address); + fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len); + fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0]; + + fadump_conf->boot_mem_top = fadump_conf->boot_memory_size; + fadump_conf->boot_mem_regs_cnt = 1; + + /* + * Start address of reserve dump area (permanent reservation) for + * re-registering FADump after dump capture. + */ + fadump_conf->reserve_dump_area_start = + be64_to_cpu(fdm->cpu_state_data.destination_address); + + rtas_fadump_update_config(fadump_conf, fdm); +} + +static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf) +{ + u64 addr = fadump_conf->reserve_dump_area_start; + + memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct)); + addr = addr & PAGE_MASK; + + fdm.header.dump_format_version = cpu_to_be32(0x00000001); + fdm.header.dump_num_sections = cpu_to_be16(3); + fdm.header.dump_status_flag = 0; + fdm.header.offset_first_dump_section = + cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct, + cpu_state_data)); + + /* + * Fields for disk dump option. + * We are not using disk dump option, hence set these fields to 0. + */ + fdm.header.dd_block_size = 0; + fdm.header.dd_block_offset = 0; + fdm.header.dd_num_blocks = 0; + fdm.header.dd_offset_disk_path = 0; + + /* set 0 to disable an automatic dump-reboot. */ + fdm.header.max_time_auto = 0; + + /* Kernel dump sections */ + /* cpu state data section. */ + fdm.cpu_state_data.request_flag = + cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.cpu_state_data.source_data_type = + cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA); + fdm.cpu_state_data.source_address = 0; + fdm.cpu_state_data.source_len = + cpu_to_be64(fadump_conf->cpu_state_data_size); + fdm.cpu_state_data.destination_address = cpu_to_be64(addr); + addr += fadump_conf->cpu_state_data_size; + + /* hpte region section */ + fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.hpte_region.source_data_type = + cpu_to_be16(RTAS_FADUMP_HPTE_REGION); + fdm.hpte_region.source_address = 0; + fdm.hpte_region.source_len = + cpu_to_be64(fadump_conf->hpte_region_size); + fdm.hpte_region.destination_address = cpu_to_be64(addr); + addr += fadump_conf->hpte_region_size; + + /* + * Align boot memory area destination address to page boundary to + * be able to mmap read this area in the vmcore. + */ + addr = PAGE_ALIGN(addr); + + /* RMA region section */ + fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG); + fdm.rmr_region.source_data_type = + cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION); + fdm.rmr_region.source_address = cpu_to_be64(0); + fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size); + fdm.rmr_region.destination_address = cpu_to_be64(addr); + addr += fadump_conf->boot_memory_size; + + rtas_fadump_update_config(fadump_conf, &fdm); + + return addr; +} + +static u64 rtas_fadump_get_bootmem_min(void) +{ + return RTAS_FADUMP_MIN_BOOT_MEM; +} + +static int rtas_fadump_register(struct fw_dump *fadump_conf) +{ + unsigned int wait_time; + int rc, err = -EIO; + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, + NULL, FADUMP_REGISTER, &fdm, + sizeof(struct rtas_fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + + } while (wait_time); + + switch (rc) { + case 0: + pr_info("Registration is successful!\n"); + fadump_conf->dump_registered = 1; + err = 0; + break; + case -1: + pr_err("Failed to register. Hardware Error(%d).\n", rc); + break; + case -3: + if (!is_fadump_boot_mem_contiguous()) + pr_err("Can't have holes in boot memory area.\n"); + else if (!is_fadump_reserved_mem_contiguous()) + pr_err("Can't have holes in reserved memory area.\n"); + + pr_err("Failed to register. Parameter Error(%d).\n", rc); + err = -EINVAL; + break; + case -9: + pr_err("Already registered!\n"); + fadump_conf->dump_registered = 1; + err = -EEXIST; + break; + default: + pr_err("Failed to register. Unknown Error(%d).\n", rc); + break; + } + + return err; +} + +static int rtas_fadump_unregister(struct fw_dump *fadump_conf) +{ + unsigned int wait_time; + int rc; + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, + NULL, FADUMP_UNREGISTER, &fdm, + sizeof(struct rtas_fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + pr_err("Failed to un-register - unexpected error(%d).\n", rc); + return -EIO; + } + + fadump_conf->dump_registered = 0; + return 0; +} + +static int rtas_fadump_invalidate(struct fw_dump *fadump_conf) +{ + unsigned int wait_time; + int rc; + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1, + NULL, FADUMP_INVALIDATE, fdm_active, + sizeof(struct rtas_fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + pr_err("Failed to invalidate - unexpected error (%d).\n", rc); + return -EIO; + } + + fadump_conf->dump_active = 0; + fdm_active = NULL; + return 0; +} + +#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000 +static inline int rtas_fadump_gpr_index(u64 id) +{ + char str[3]; + int i = -1; + + if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) { + /* get the digits at the end */ + id &= ~RTAS_FADUMP_GPR_MASK; + id >>= 24; + str[2] = '\0'; + str[1] = id & 0xff; + str[0] = (id >> 8) & 0xff; + if (kstrtoint(str, 10, &i)) + i = -EINVAL; + if (i > 31) + i = -1; + } + return i; +} + +static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +{ + int i; + + i = rtas_fadump_gpr_index(reg_id); + if (i >= 0) + regs->gpr[i] = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("NIA")) + regs->nip = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("MSR")) + regs->msr = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("CTR")) + regs->ctr = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("LR")) + regs->link = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("XER")) + regs->xer = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("CR")) + regs->ccr = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("DAR")) + regs->dar = (unsigned long)reg_val; + else if (reg_id == fadump_str_to_u64("DSISR")) + regs->dsisr = (unsigned long)reg_val; +} + +static struct rtas_fadump_reg_entry* __init +rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry, + struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + + while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) { + rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), + be64_to_cpu(reg_entry->reg_value)); + reg_entry++; + } + reg_entry++; + return reg_entry; +} + +/* + * Read CPU state dump data and convert it into ELF notes. + * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be + * used to access the data to allow for additional fields to be added without + * affecting compatibility. Each list of registers for a CPU starts with + * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, + * 8 Byte ASCII identifier and 8 Byte register value. The register entry + * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part + * of register value. For more details refer to PAPR document. + * + * Only for the crashing cpu we ignore the CPU dump data and get exact + * state from fadump crash info structure populated by first kernel at the + * time of crash. + */ +static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf) +{ + struct rtas_fadump_reg_save_area_header *reg_header; + struct fadump_crash_info_header *fdh = NULL; + struct rtas_fadump_reg_entry *reg_entry; + u32 num_cpus, *note_buf; + int i, rc = 0, cpu = 0; + struct pt_regs regs; + unsigned long addr; + void *vaddr; + + addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address); + vaddr = __va(addr); + + reg_header = vaddr; + if (be64_to_cpu(reg_header->magic_number) != + fadump_str_to_u64("REGSAVE")) { + pr_err("Unable to read register save area.\n"); + return -ENOENT; + } + + pr_debug("--------CPU State Data------------\n"); + pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); + pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); + + vaddr += be32_to_cpu(reg_header->num_cpu_offset); + num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); + pr_debug("NumCpus : %u\n", num_cpus); + vaddr += sizeof(u32); + reg_entry = (struct rtas_fadump_reg_entry *)vaddr; + + rc = fadump_setup_cpu_notes_buf(num_cpus); + if (rc != 0) + return rc; + + note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr; + + if (fadump_conf->fadumphdr_addr) + fdh = __va(fadump_conf->fadumphdr_addr); + + for (i = 0; i < num_cpus; i++) { + if (be64_to_cpu(reg_entry->reg_id) != + fadump_str_to_u64("CPUSTRT")) { + pr_err("Unable to read CPU state data\n"); + rc = -ENOENT; + goto error_out; + } + /* Lower 4 bytes of reg_value contains logical cpu id */ + cpu = (be64_to_cpu(reg_entry->reg_value) & + RTAS_FADUMP_CPU_ID_MASK); + if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_mask)) { + RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); + continue; + } + pr_debug("Reading register data for cpu %d...\n", cpu); + if (fdh && fdh->crashing_cpu == cpu) { + regs = fdh->regs; + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry); + } else { + reg_entry++; + reg_entry = rtas_fadump_read_regs(reg_entry, ®s); + note_buf = fadump_regs_to_elf_notes(note_buf, ®s); + } + } + final_note(note_buf); + + if (fdh) { + pr_debug("Updating elfcore header (%llx) with cpu notes\n", + fdh->elfcorehdr_addr); + fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr)); + } + return 0; + +error_out: + fadump_free_cpu_notes_buf(); + return rc; + +} + +/* + * Validate and process the dump data stored by firmware before exporting + * it through '/proc/vmcore'. + */ +static int __init rtas_fadump_process(struct fw_dump *fadump_conf) +{ + struct fadump_crash_info_header *fdh; + int rc = 0; + + if (!fdm_active || !fadump_conf->fadumphdr_addr) + return -EINVAL; + + /* Check if the dump data is valid. */ + if ((be16_to_cpu(fdm_active->header.dump_status_flag) == + RTAS_FADUMP_ERROR_FLAG) || + (fdm_active->cpu_state_data.error_flags != 0) || + (fdm_active->rmr_region.error_flags != 0)) { + pr_err("Dump taken by platform is not valid\n"); + return -EINVAL; + } + if ((fdm_active->rmr_region.bytes_dumped != + fdm_active->rmr_region.source_len) || + !fdm_active->cpu_state_data.bytes_dumped) { + pr_err("Dump taken by platform is incomplete\n"); + return -EINVAL; + } + + /* Validate the fadump crash info header */ + fdh = __va(fadump_conf->fadumphdr_addr); + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + pr_err("Crash info header is not valid.\n"); + return -EINVAL; + } + + rc = rtas_fadump_build_cpu_notes(fadump_conf); + if (rc) + return rc; + + /* + * We are done validating dump info and elfcore header is now ready + * to be exported. set elfcorehdr_addr so that vmcore module will + * export the elfcore header through '/proc/vmcore'. + */ + elfcorehdr_addr = fdh->elfcorehdr_addr; + + return 0; +} + +static void rtas_fadump_region_show(struct fw_dump *fadump_conf, + struct seq_file *m) +{ + const struct rtas_fadump_section *cpu_data_section; + const struct rtas_fadump_mem_struct *fdm_ptr; + + if (fdm_active) + fdm_ptr = fdm_active; + else + fdm_ptr = &fdm; + + cpu_data_section = &(fdm_ptr->cpu_state_data); + seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", + be64_to_cpu(cpu_data_section->destination_address), + be64_to_cpu(cpu_data_section->destination_address) + + be64_to_cpu(cpu_data_section->source_len) - 1, + be64_to_cpu(cpu_data_section->source_len), + be64_to_cpu(cpu_data_section->bytes_dumped)); + + seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n", + be64_to_cpu(fdm_ptr->hpte_region.destination_address), + be64_to_cpu(fdm_ptr->hpte_region.destination_address) + + be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, + be64_to_cpu(fdm_ptr->hpte_region.source_len), + be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); + + seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", + be64_to_cpu(fdm_ptr->rmr_region.source_address), + be64_to_cpu(fdm_ptr->rmr_region.destination_address)); + seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", + be64_to_cpu(fdm_ptr->rmr_region.source_len), + be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); + + /* Dump is active. Show preserved area start address. */ + if (fdm_active) { + seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n", + fadump_conf->boot_mem_top); + } +} + +static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh, + const char *msg) +{ + /* Call ibm,os-term rtas call to trigger firmware assisted dump */ + rtas_os_term((char *)msg); +} + +static struct fadump_ops rtas_fadump_ops = { + .fadump_init_mem_struct = rtas_fadump_init_mem_struct, + .fadump_get_bootmem_min = rtas_fadump_get_bootmem_min, + .fadump_register = rtas_fadump_register, + .fadump_unregister = rtas_fadump_unregister, + .fadump_invalidate = rtas_fadump_invalidate, + .fadump_process = rtas_fadump_process, + .fadump_region_show = rtas_fadump_region_show, + .fadump_trigger = rtas_fadump_trigger, +}; + +void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) +{ + int i, size, num_sections; + const __be32 *sections; + const __be32 *token; + + /* + * Check if Firmware Assisted dump is supported. if yes, check + * if dump has been initiated on last reboot. + */ + token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); + if (!token) + return; + + fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token); + fadump_conf->ops = &rtas_fadump_ops; + fadump_conf->fadump_supported = 1; + + /* Firmware supports 64-bit value for size, align it to pagesize. */ + fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE); + + /* + * The 'ibm,kernel-dump' rtas node is present only if there is + * dump data waiting for us. + */ + fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); + if (fdm_active) { + pr_info("Firmware-assisted dump is active.\n"); + fadump_conf->dump_active = 1; + rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active)); + } + + /* Get the sizes required to store dump data for the firmware provided + * dump sections. + * For each dump section type supported, a 32bit cell which defines + * the ID of a supported section followed by two 32 bit cells which + * gives the size of the section in bytes. + */ + sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", + &size); + + if (!sections) + return; + + num_sections = size / (3 * sizeof(u32)); + + for (i = 0; i < num_sections; i++, sections += 3) { + u32 type = (u32)of_read_number(sections, 1); + + switch (type) { + case RTAS_FADUMP_CPU_STATE_DATA: + fadump_conf->cpu_state_data_size = + of_read_ulong(§ions[1], 2); + break; + case RTAS_FADUMP_HPTE_REGION: + fadump_conf->hpte_region_size = + of_read_ulong(§ions[1], 2); + break; + } + } +} diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h new file mode 100644 index 0000000000..fd59bd7ca9 --- /dev/null +++ b/arch/powerpc/platforms/pseries/rtas-fadump.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Firmware-Assisted Dump support on POWERVM platform. + * + * Copyright 2011, Mahesh Salgaonkar, IBM Corporation. + * Copyright 2019, Hari Bathini, IBM Corporation. + */ + +#ifndef _PSERIES_RTAS_FADUMP_H +#define _PSERIES_RTAS_FADUMP_H + +/* + * On some Power systems where RMO is 128MB, it still requires minimum of + * 256MB for kernel to boot successfully. When kdump infrastructure is + * configured to save vmcore over network, we run into OOM issue while + * loading modules related to network setup. Hence we need additional 64M + * of memory to avoid OOM issue. + */ +#define RTAS_FADUMP_MIN_BOOT_MEM ((0x1UL << 28) + (0x1UL << 26)) + +/* Firmware provided dump sections */ +#define RTAS_FADUMP_CPU_STATE_DATA 0x0001 +#define RTAS_FADUMP_HPTE_REGION 0x0002 +#define RTAS_FADUMP_REAL_MODE_REGION 0x0011 + +/* Dump request flag */ +#define RTAS_FADUMP_REQUEST_FLAG 0x00000001 + +/* Dump status flag */ +#define RTAS_FADUMP_ERROR_FLAG 0x2000 + +/* Kernel Dump section info */ +struct rtas_fadump_section { + __be32 request_flag; + __be16 source_data_type; + __be16 error_flags; + __be64 source_address; + __be64 source_len; + __be64 bytes_dumped; + __be64 destination_address; +}; + +/* ibm,configure-kernel-dump header. */ +struct rtas_fadump_section_header { + __be32 dump_format_version; + __be16 dump_num_sections; + __be16 dump_status_flag; + __be32 offset_first_dump_section; + + /* Fields for disk dump option. */ + __be32 dd_block_size; + __be64 dd_block_offset; + __be64 dd_num_blocks; + __be32 dd_offset_disk_path; + + /* Maximum time allowed to prevent an automatic dump-reboot. */ + __be32 max_time_auto; +}; + +/* + * Firmware Assisted dump memory structure. This structure is required for + * registering future kernel dump with power firmware through rtas call. + * + * No disk dump option. Hence disk dump path string section is not included. + */ +struct rtas_fadump_mem_struct { + struct rtas_fadump_section_header header; + + /* Kernel dump sections */ + struct rtas_fadump_section cpu_state_data; + struct rtas_fadump_section hpte_region; + + /* + * TODO: Extend multiple boot memory regions support in the kernel + * for this platform. + */ + struct rtas_fadump_section rmr_region; +}; + +/* + * The firmware-assisted dump format. + * + * The register save area is an area in the partition's memory used to preserve + * the register contents (CPU state data) for the active CPUs during a firmware + * assisted dump. The dump format contains register save area header followed + * by register entries. Each list of registers for a CPU starts with "CPUSTRT" + * and ends with "CPUEND". + */ + +/* Register save area header. */ +struct rtas_fadump_reg_save_area_header { + __be64 magic_number; + __be32 version; + __be32 num_cpu_offset; +}; + +/* Register entry. */ +struct rtas_fadump_reg_entry { + __be64 reg_id; + __be64 reg_value; +}; + +/* Utility macros */ +#define RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry) \ +({ \ + while (be64_to_cpu(reg_entry->reg_id) != \ + fadump_str_to_u64("CPUEND")) \ + reg_entry++; \ + reg_entry++; \ +}) + +#define RTAS_FADUMP_CPU_ID_MASK ((1UL << 32) - 1) + +#endif /* _PSERIES_RTAS_FADUMP_H */ diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c new file mode 100644 index 0000000000..b37d52f403 --- /dev/null +++ b/arch/powerpc/platforms/pseries/rtas-work-area.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "rtas-work-area: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +enum { + /* + * Ensure the pool is page-aligned. + */ + RTAS_WORK_AREA_ARENA_ALIGN = PAGE_SIZE, + /* + * Don't let a single allocation claim the whole arena. + */ + RTAS_WORK_AREA_ARENA_SZ = RTAS_WORK_AREA_MAX_ALLOC_SZ * 2, + /* + * The smallest known work area size is for ibm,get-vpd's + * location code argument, which is limited to 79 characters + * plus 1 nul terminator. + * + * PAPR+ 7.3.20 ibm,get-vpd RTAS Call + * PAPR+ 12.3.2.4 Converged Location Code Rules - Length Restrictions + */ + RTAS_WORK_AREA_MIN_ALLOC_SZ = roundup_pow_of_two(80), +}; + +static struct { + struct gen_pool *gen_pool; + char *arena; + struct mutex mutex; /* serializes allocations */ + struct wait_queue_head wqh; + mempool_t descriptor_pool; + bool available; +} rwa_state = { + .mutex = __MUTEX_INITIALIZER(rwa_state.mutex), + .wqh = __WAIT_QUEUE_HEAD_INITIALIZER(rwa_state.wqh), +}; + +/* + * A single work area buffer and descriptor to serve requests early in + * boot before the allocator is fully initialized. We know 4KB is the + * most any boot time user needs (they all call ibm,get-system-parameter). + */ +static bool early_work_area_in_use __initdata; +static char early_work_area_buf[SZ_4K] __initdata __aligned(SZ_4K); +static struct rtas_work_area early_work_area __initdata = { + .buf = early_work_area_buf, + .size = sizeof(early_work_area_buf), +}; + + +static struct rtas_work_area * __init rtas_work_area_alloc_early(size_t size) +{ + WARN_ON(size > early_work_area.size); + WARN_ON(early_work_area_in_use); + early_work_area_in_use = true; + memset(early_work_area.buf, 0, early_work_area.size); + return &early_work_area; +} + +static void __init rtas_work_area_free_early(struct rtas_work_area *work_area) +{ + WARN_ON(work_area != &early_work_area); + WARN_ON(!early_work_area_in_use); + early_work_area_in_use = false; +} + +struct rtas_work_area * __ref __rtas_work_area_alloc(size_t size) +{ + struct rtas_work_area *area; + unsigned long addr; + + might_sleep(); + + /* + * The rtas_work_area_alloc() wrapper enforces this at build + * time. Requests that exceed the arena size will block + * indefinitely. + */ + WARN_ON(size > RTAS_WORK_AREA_MAX_ALLOC_SZ); + + if (!rwa_state.available) + return rtas_work_area_alloc_early(size); + /* + * To ensure FCFS behavior and prevent a high rate of smaller + * requests from starving larger ones, use the mutex to queue + * allocations. + */ + mutex_lock(&rwa_state.mutex); + wait_event(rwa_state.wqh, + (addr = gen_pool_alloc(rwa_state.gen_pool, size)) != 0); + mutex_unlock(&rwa_state.mutex); + + area = mempool_alloc(&rwa_state.descriptor_pool, GFP_KERNEL); + area->buf = (char *)addr; + area->size = size; + + return area; +} + +void __ref rtas_work_area_free(struct rtas_work_area *area) +{ + if (!rwa_state.available) { + rtas_work_area_free_early(area); + return; + } + + gen_pool_free(rwa_state.gen_pool, (unsigned long)area->buf, area->size); + mempool_free(area, &rwa_state.descriptor_pool); + wake_up(&rwa_state.wqh); +} + +/* + * Initialization of the work area allocator happens in two parts. To + * reliably reserve an arena that satisfies RTAS addressing + * requirements, we must perform a memblock allocation early, + * immmediately after RTAS instantiation. Then we have to wait until + * the slab allocator is up before setting up the descriptor mempool + * and adding the arena to a gen_pool. + */ +static __init int rtas_work_area_allocator_init(void) +{ + const unsigned int order = ilog2(RTAS_WORK_AREA_MIN_ALLOC_SZ); + const phys_addr_t pa_start = __pa(rwa_state.arena); + const phys_addr_t pa_end = pa_start + RTAS_WORK_AREA_ARENA_SZ - 1; + struct gen_pool *pool; + const int nid = NUMA_NO_NODE; + int err; + + err = -ENOMEM; + if (!rwa_state.arena) + goto err_out; + + pool = gen_pool_create(order, nid); + if (!pool) + goto err_out; + /* + * All RTAS functions that consume work areas are OK with + * natural alignment, when they have alignment requirements at + * all. + */ + gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL); + + err = gen_pool_add(pool, (unsigned long)rwa_state.arena, + RTAS_WORK_AREA_ARENA_SZ, nid); + if (err) + goto err_destroy; + + err = mempool_init_kmalloc_pool(&rwa_state.descriptor_pool, 1, + sizeof(struct rtas_work_area)); + if (err) + goto err_destroy; + + rwa_state.gen_pool = pool; + rwa_state.available = true; + + pr_debug("arena [%pa-%pa] (%uK), min/max alloc sizes %u/%u\n", + &pa_start, &pa_end, + RTAS_WORK_AREA_ARENA_SZ / SZ_1K, + RTAS_WORK_AREA_MIN_ALLOC_SZ, + RTAS_WORK_AREA_MAX_ALLOC_SZ); + + return 0; + +err_destroy: + gen_pool_destroy(pool); +err_out: + return err; +} +machine_arch_initcall(pseries, rtas_work_area_allocator_init); + +/** + * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas. + */ +void __init rtas_work_area_reserve_arena(const phys_addr_t limit) +{ + const phys_addr_t align = RTAS_WORK_AREA_ARENA_ALIGN; + const phys_addr_t size = RTAS_WORK_AREA_ARENA_SZ; + const phys_addr_t min = MEMBLOCK_LOW_LIMIT; + const int nid = NUMA_NO_NODE; + + /* + * Too early for a machine_is(pseries) check. But PAPR + * effectively mandates that ibm,get-system-parameter is + * present: + * + * R1–7.3.16–1. All platforms must support the System + * Parameters option. + * + * So set up the arena if we find that, with a fallback to + * ibm,configure-connector, just in case. + */ + if (rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER) || + rtas_function_implemented(RTAS_FN_IBM_CONFIGURE_CONNECTOR)) + rwa_state.arena = memblock_alloc_try_nid(size, align, min, limit, nid); +} diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c new file mode 100644 index 0000000000..ecea85c74c --- /dev/null +++ b/arch/powerpc/platforms/pseries/setup.c @@ -0,0 +1,1162 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * 64-bit pSeries and RS/6000 setup code. + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + */ + +/* + * bootup setup stuff.. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +DEFINE_STATIC_KEY_FALSE(shared_processor); +EXPORT_SYMBOL(shared_processor); + +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); +#endif + +int CMO_PrPSP = -1; +int CMO_SecPSP = -1; +unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); +EXPORT_SYMBOL(CMO_PageSize); + +int fwnmi_active; /* TRUE if an FWNMI handler is present */ +int ibm_nmi_interlock_token; +u32 pseries_security_flavor; + +static void pSeries_show_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: CHRP %s\n", model); + of_node_put(root); + if (radix_enabled()) + seq_printf(m, "MMU\t\t: Radix\n"); + else + seq_printf(m, "MMU\t\t: Hash\n"); +} + +/* Initialize firmware assisted non-maskable interrupts if + * the firmware supports this feature. + */ +static void __init fwnmi_init(void) +{ + unsigned long system_reset_addr, machine_check_addr; + u8 *mce_data_buf; + unsigned int i; + int nr_cpus = num_possible_cpus(); +#ifdef CONFIG_PPC_64S_HASH_MMU + struct slb_entry *slb_ptr; + size_t size; +#endif + int ibm_nmi_register_token; + + ibm_nmi_register_token = rtas_function_token(RTAS_FN_IBM_NMI_REGISTER); + if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE) + return; + + ibm_nmi_interlock_token = rtas_function_token(RTAS_FN_IBM_NMI_INTERLOCK); + if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE)) + return; + + /* If the kernel's not linked at zero we point the firmware at low + * addresses anyway, and use a trampoline to get to the real code. */ + system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START; + machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START; + + if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL, + system_reset_addr, machine_check_addr)) + fwnmi_active = 1; + + /* + * Allocate a chunk for per cpu buffer to hold rtas errorlog. + * It will be used in real mode mce handler, hence it needs to be + * below RMA. + */ + mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus, + RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); + if (!mce_data_buf) + panic("Failed to allocate %d bytes below %pa for MCE buffer\n", + RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size); + + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_data_buf = mce_data_buf + + (RTAS_ERROR_LOG_MAX * i); + } + +#ifdef CONFIG_PPC_64S_HASH_MMU + if (!radix_enabled()) { + /* Allocate per cpu area to save old slb contents during MCE */ + size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus; + slb_ptr = memblock_alloc_try_nid_raw(size, + sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT, + ppc64_rma_size, NUMA_NO_NODE); + if (!slb_ptr) + panic("Failed to allocate %zu bytes below %pa for slb area\n", + size, &ppc64_rma_size); + + for_each_possible_cpu(i) + paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i); + } +#endif +} + +/* + * Affix a device for the first timer to the platform bus if + * we have firmware support for the H_WATCHDOG hypercall. + */ +static __init int pseries_wdt_init(void) +{ + if (firmware_has_feature(FW_FEATURE_WATCHDOG)) + platform_device_register_simple("pseries-wdt", 0, NULL, 0); + return 0; +} +machine_subsys_initcall(pseries, pseries_wdt_init); + +static void pseries_8259_cascade(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int cascade_irq = i8259_irq(); + + if (cascade_irq) + generic_handle_irq(cascade_irq); + + chip->irq_eoi(&desc->irq_data); +} + +static void __init pseries_setup_i8259_cascade(void) +{ + struct device_node *np, *old, *found = NULL; + unsigned int cascade; + const u32 *addrp; + unsigned long intack = 0; + int naddr; + + for_each_node_by_type(np, "interrupt-controller") { + if (of_device_is_compatible(np, "chrp,iic")) { + found = np; + break; + } + } + + if (found == NULL) { + printk(KERN_DEBUG "pic: no ISA interrupt controller\n"); + return; + } + + cascade = irq_of_parse_and_map(found, 0); + if (!cascade) { + printk(KERN_ERR "pic: failed to map cascade interrupt"); + return; + } + pr_debug("pic: cascade mapped to irq %d\n", cascade); + + for (old = of_node_get(found); old != NULL ; old = np) { + np = of_get_parent(old); + of_node_put(old); + if (np == NULL) + break; + if (!of_node_name_eq(np, "pci")) + continue; + addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL); + if (addrp == NULL) + continue; + naddr = of_n_addr_cells(np); + intack = addrp[naddr-1]; + if (naddr > 1) + intack |= ((unsigned long)addrp[naddr-2]) << 32; + } + if (intack) + printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack); + i8259_init(found, intack); + of_node_put(found); + irq_set_chained_handler(cascade, pseries_8259_cascade); +} + +static void __init pseries_init_irq(void) +{ + /* Try using a XIVE if available, otherwise use a XICS */ + if (!xive_spapr_init()) { + xics_init(); + pseries_setup_i8259_cascade(); + } +} + +static void pseries_lpar_enable_pmcs(void) +{ + unsigned long set, reset; + + set = 1UL << 63; + reset = 0; + plpar_hcall_norets(H_PERFMON, set, reset); +} + +static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + struct device_node *parent, *np = rd->dn; + struct pci_dn *pdn; + int err = NOTIFY_OK; + + switch (action) { + case OF_RECONFIG_ATTACH_NODE: + parent = of_get_parent(np); + pdn = parent ? PCI_DN(parent) : NULL; + if (pdn) + pci_add_device_node_info(pdn->phb, np); + + of_node_put(parent); + break; + case OF_RECONFIG_DETACH_NODE: + pdn = PCI_DN(np); + if (pdn) + list_del(&pdn->list); + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block pci_dn_reconfig_nb = { + .notifier_call = pci_dn_reconfig_notifier, +}; + +struct kmem_cache *dtl_cache; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +/* + * Allocate space for the dispatch trace log for all possible cpus + * and register the buffers with the hypervisor. This is used for + * computing time stolen by the hypervisor. + */ +static int alloc_dispatch_logs(void) +{ + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + if (!dtl_cache) + return 0; + + alloc_dtl_buffers(0); + + /* Register the DTL for the current (boot) cpu */ + register_dtl_buffer(smp_processor_id()); + + return 0; +} +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +static inline int alloc_dispatch_logs(void) +{ + return 0; +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +static int alloc_dispatch_log_kmem_cache(void) +{ + void (*ctor)(void *) = get_dtl_cache_ctor(); + + dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, ctor); + if (!dtl_cache) { + pr_warn("Failed to create dispatch trace log buffer cache\n"); + pr_warn("Stolen time statistics will be unreliable\n"); + return 0; + } + + return alloc_dispatch_logs(); +} +machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache); + +DEFINE_PER_CPU(u64, idle_spurr_cycles); +DEFINE_PER_CPU(u64, idle_entry_purr_snap); +DEFINE_PER_CPU(u64, idle_entry_spurr_snap); +static void pseries_lpar_idle(void) +{ + /* + * Default handler to go into low thread priority and possibly + * low power mode by ceding processor to hypervisor + */ + + if (!prep_irq_for_idle()) + return; + + /* Indicate to hypervisor that we are idle. */ + pseries_idle_prolog(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); + + pseries_idle_epilog(); +} + +static bool pseries_reloc_on_exception_enabled; + +bool pseries_reloc_on_exception(void) +{ + return pseries_reloc_on_exception_enabled; +} +EXPORT_SYMBOL_GPL(pseries_reloc_on_exception); + +/* + * Enable relocation on during exceptions. This has partition wide scope and + * may take a while to complete, if it takes longer than one second we will + * just give up rather than wasting any more time on this - if that turns out + * to ever be a problem in practice we can move this into a kernel thread to + * finish off the process later in boot. + */ +bool pseries_enable_reloc_on_exc(void) +{ + long rc; + unsigned int delay, total_delay = 0; + + while (1) { + rc = enable_reloc_on_exceptions(); + if (!H_IS_LONG_BUSY(rc)) { + if (rc == H_P2) { + pr_info("Relocation on exceptions not" + " supported\n"); + return false; + } else if (rc != H_SUCCESS) { + pr_warn("Unable to enable relocation" + " on exceptions: %ld\n", rc); + return false; + } + pseries_reloc_on_exception_enabled = true; + return true; + } + + delay = get_longbusy_msecs(rc); + total_delay += delay; + if (total_delay > 1000) { + pr_warn("Warning: Giving up waiting to enable " + "relocation on exceptions (%u msec)!\n", + total_delay); + return false; + } + + mdelay(delay); + } +} +EXPORT_SYMBOL(pseries_enable_reloc_on_exc); + +void pseries_disable_reloc_on_exc(void) +{ + long rc; + + while (1) { + rc = disable_reloc_on_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + if (rc == H_SUCCESS) + pseries_reloc_on_exception_enabled = false; + else + pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n", + rc); +} +EXPORT_SYMBOL(pseries_disable_reloc_on_exc); + +#ifdef __LITTLE_ENDIAN__ +void pseries_big_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_big_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + + /* + * At this point it is unlikely panic() will get anything + * out to the user, since this is called very late in kexec + * but at least this will stop us from continuing on further + * and creating an even more difficult to debug situation. + * + * There is a known problem when kdump'ing, if cpus are offline + * the above call will fail. Rather than panicking again, keep + * going and hope the kdump kernel is also little endian, which + * it usually is. + */ + if (rc && !kdump_in_progress()) + panic("Could not enable big endian exceptions"); +} + +void __init pseries_little_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_little_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + if (rc) { + ppc_md.progress("H_SET_MODE LE exception fail", 0); + panic("Could not enable little endian exceptions"); + } +} +#endif + +static void __init pSeries_discover_phbs(void) +{ + struct device_node *node; + struct pci_controller *phb; + struct device_node *root = of_find_node_by_path("/"); + + for_each_child_of_node(root, node) { + if (!of_node_is_type(node, "pci") && + !of_node_is_type(node, "pciex")) + continue; + + phb = pcibios_alloc_controller(node); + if (!phb) + continue; + rtas_setup_phb(phb); + pci_process_bridge_OF_ranges(phb, node, 0); + isa_bridge_find_early(phb); + phb->controller_ops = pseries_pci_controller_ops; + + /* create pci_dn's for DT nodes under this PHB */ + pci_devs_phb_init_dynamic(phb); + + pseries_msi_allocate_domains(phb); + } + + of_node_put(root); + + /* + * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties + * in chosen. + */ + of_pci_check_probe_only(); +} + +static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) +{ + /* + * The features below are disabled by default, so we instead look to see + * if firmware has *enabled* them, and set them if so. + */ + if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) + security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST); + + if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST) + security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST); + + if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE) + security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE); + + if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK) + security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if + * H_CPU_BEHAV_FAVOUR_SECURITY is. + */ + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) { + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + pseries_security_flavor = 0; + } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 1; + else + pseries_security_flavor = 2; + + if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + + if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER) + security_ftr_clear(SEC_FTR_STF_BARRIER); + + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); +} + +void pseries_setup_security_mitigations(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* + * Set features to the defaults assumed by init_cpu_char_feature_flags() + * so it can set/clear again any features that might have changed after + * migration, and in case the hypercall fails and it is not even called. + */ + powerpc_security_features = SEC_FTR_DEFAULT; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) + init_cpu_char_feature_flags(&result); + + /* + * We're the guest so this doesn't apply to us, clear it to simplify + * handling of it elsewhere. + */ + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); + + types = L1D_FLUSH_FALLBACK; + + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + types |= L1D_FLUSH_MTTRIG; + + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) + types |= L1D_FLUSH_ORI; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); + + setup_rfi_flush(types, enable); + setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); + + setup_stf_barrier(); +} + +#ifdef CONFIG_PCI_IOV +enum rtas_iov_fw_value_map { + NUM_RES_PROPERTY = 0, /* Number of Resources */ + LOW_INT = 1, /* Lowest 32 bits of Address */ + START_OF_ENTRIES = 2, /* Always start of entry */ + APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */ + WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */ + NEXT_ENTRY = 7 /* Go to next entry on array */ +}; + +enum get_iov_fw_value_index { + BAR_ADDRS = 1, /* Get Bar Address */ + APERTURE_SIZE = 2, /* Get Aperture Size */ + WDW_SIZE = 3 /* Get Window Size */ +}; + +static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno, + enum get_iov_fw_value_index value) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(dev); + int i, num_res, ret = 0; + + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (!indexes) + return 0; + + /* + * First element in the array is the number of Bars + * returned. Search through the list to find the matching + * bar + */ + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + if (resno >= num_res) + return 0; /* or an error */ + + i = START_OF_ENTRIES + NEXT_ENTRY * resno; + switch (value) { + case BAR_ADDRS: + ret = of_read_number(&indexes[i], 2); + break; + case APERTURE_SIZE: + ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2); + break; + case WDW_SIZE: + ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2); + break; + } + + return ret; +} + +static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes) +{ + struct resource *res; + resource_size_t base, size; + int i, r, num_res; + + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS); + for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS; + i += NEXT_ENTRY, r++) { + res = &dev->resource[r + PCI_IOV_RESOURCES]; + base = of_read_number(&indexes[i], 2); + size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2); + res->flags = pci_parse_of_flags(of_read_number + (&indexes[i + LOW_INT], 1), 0); + res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED); + res->name = pci_name(dev); + res->start = base; + res->end = base + size - 1; + } +} + +static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes) +{ + struct resource *res, *root, *conflict; + resource_size_t base, size; + int i, r, num_res; + + /* + * First element in the array is the number of Bars + * returned. Search through the list to find the matching + * bars assign them from firmware into resources structure. + */ + num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1); + for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS; + i += NEXT_ENTRY, r++) { + res = &dev->resource[r + PCI_IOV_RESOURCES]; + base = of_read_number(&indexes[i], 2); + size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2); + res->name = pci_name(dev); + res->start = base; + res->end = base + size - 1; + root = &iomem_resource; + dev_dbg(&dev->dev, + "pSeries IOV BAR %d: trying firmware assignment %pR\n", + r + PCI_IOV_RESOURCES, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", + r + PCI_IOV_RESOURCES, res, + conflict->name, conflict); + res->flags |= IORESOURCE_UNSET; + } + } +} + +static void pseries_disable_sriov_resources(struct pci_dev *pdev) +{ + int i; + + pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n"); + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) + pdev->resource[i + PCI_IOV_RESOURCES].flags = 0; +} + +static void pseries_pci_fixup_resources(struct pci_dev *pdev) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(pdev); + + /*Firmware must support open sriov otherwise dont configure*/ + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (indexes) + of_pci_set_vf_bar_size(pdev, indexes); + else + pseries_disable_sriov_resources(pdev); +} + +static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev) +{ + const int *indexes; + struct device_node *dn = pci_device_to_OF_node(pdev); + + if (!pdev->is_physfn) + return; + /*Firmware must support open sriov otherwise don't configure*/ + indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL); + if (indexes) + of_pci_parse_iov_addrs(pdev, indexes); + else + pseries_disable_sriov_resources(pdev); +} + +static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev, + int resno) +{ + const __be32 *reg; + struct device_node *dn = pci_device_to_OF_node(pdev); + + /*Firmware must support open sriov otherwise report regular alignment*/ + reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL); + if (!reg) + return pci_iov_resource_size(pdev, resno); + + if (!pdev->is_physfn) + return 0; + return pseries_get_iov_fw_value(pdev, + resno - PCI_IOV_RESOURCES, + APERTURE_SIZE); +} +#endif + +static void __init pSeries_setup_arch(void) +{ + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + + /* Discover PIC type and setup ppc_md accordingly */ + smp_init_pseries(); + + // Setup CPU hotplug callbacks + pseries_cpu_hotplug_init(); + + if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE)) + if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) + panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n"); + + + /* openpic global configuration register (64-bit format). */ + /* openpic Interrupt Source Unit pointer (64-bit format). */ + /* python0 facility area (mmio) (64-bit format) REAL address. */ + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + fwnmi_init(); + + pseries_setup_security_mitigations(); + if (!radix_enabled()) + pseries_lpar_read_hblkrm_characteristics(); + + /* By default, only probe PCI (can be overridden by rtas_pci) */ + pci_add_flags(PCI_PROBE_ONLY); + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + of_reconfig_notifier_register(&pci_dn_reconfig_nb); + + pSeries_nvram_init(); + + if (firmware_has_feature(FW_FEATURE_LPAR)) { + vpa_init(boot_cpuid); + + if (lppaca_shared_proc()) { + static_branch_enable(&shared_processor); + pv_spinlocks_init(); +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); +#endif + } + + ppc_md.power_save = pseries_lpar_idle; + ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; +#ifdef CONFIG_PCI_IOV + ppc_md.pcibios_fixup_resources = + pseries_pci_fixup_resources; + ppc_md.pcibios_fixup_sriov = + pseries_pci_fixup_iov_resources; + ppc_md.pcibios_iov_resource_alignment = + pseries_pci_iov_resource_alignment; +#endif + } else { + /* No special idle routine */ + ppc_md.enable_pmcs = power4_enable_pmcs; + } + + ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; + pseries_rng_init(); +} + +static void pseries_panic(char *str) +{ + panic_flush_kmsg_end(); + rtas_os_term(str); +} + +static int __init pSeries_init_panel(void) +{ + /* Manually leave the kernel version on the panel. */ +#ifdef __BIG_ENDIAN__ + ppc_md.progress("Linux ppc64\n", 0); +#else + ppc_md.progress("Linux ppc64le\n", 0); +#endif + ppc_md.progress(init_utsname()->version, 0); + + return 0; +} +machine_arch_initcall(pseries, pSeries_init_panel); + +static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx) +{ + return plpar_hcall_norets(H_SET_DABR, dabr); +} + +static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) +{ + /* Have to set at least one bit in the DABRX according to PAPR */ + if (dabrx == 0 && dabr == 0) + dabrx = DABRX_USER; + /* PAPR says we can only set kernel and user bits */ + dabrx &= DABRX_KERNEL | DABRX_USER; + + return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); +} + +static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx) +{ + /* PAPR says we can't set HYP */ + dawrx &= ~DAWRX_HYP; + + if (nr == 0) + return plpar_set_watchpoint0(dawr, dawrx); + else + return plpar_set_watchpoint1(dawr, dawrx); +} + +#define CMO_CHARACTERISTICS_TOKEN 44 +#define CMO_MAXLENGTH 1026 + +void pSeries_coalesce_init(void) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data)) + powerpc_firmware_features |= FW_FEATURE_XCMO; + else + powerpc_firmware_features &= ~FW_FEATURE_XCMO; +} + +/** + * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, + * handle that here. (Stolen from parse_system_parameter_string) + */ +static void __init pSeries_cmo_feature_init(void) +{ + static struct papr_sysparm_buf buf __initdata; + static_assert(sizeof(buf.val) >= CMO_MAXLENGTH); + char *ptr, *key, *value, *end; + int page_order = IOMMU_PAGE_SHIFT_4K; + + pr_debug(" -> fw_cmo_feature_init()\n"); + + if (papr_sysparm_get(PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS, &buf)) { + pr_debug("CMO not available\n"); + pr_debug(" <- fw_cmo_feature_init()\n"); + return; + } + + end = &buf.val[CMO_MAXLENGTH]; + ptr = &buf.val[0]; + key = value = ptr; + + while (*ptr && (ptr <= end)) { + /* Separate the key and value by replacing '=' with '\0' and + * point the value at the string after the '=' + */ + if (ptr[0] == '=') { + ptr[0] = '\0'; + value = ptr + 1; + } else if (ptr[0] == '\0' || ptr[0] == ',') { + /* Terminate the string containing the key/value pair */ + ptr[0] = '\0'; + + if (key == value) { + pr_debug("Malformed key/value pair\n"); + /* Never found a '=', end processing */ + break; + } + + if (0 == strcmp(key, "CMOPageSize")) + page_order = simple_strtol(value, NULL, 10); + else if (0 == strcmp(key, "PrPSP")) + CMO_PrPSP = simple_strtol(value, NULL, 10); + else if (0 == strcmp(key, "SecPSP")) + CMO_SecPSP = simple_strtol(value, NULL, 10); + value = key = ptr + 1; + } + ptr++; + } + + /* Page size is returned as the power of 2 of the page size, + * convert to the page size in bytes before returning + */ + CMO_PageSize = 1 << page_order; + pr_debug("CMO_PageSize = %lu\n", CMO_PageSize); + + if (CMO_PrPSP != -1 || CMO_SecPSP != -1) { + pr_info("CMO enabled\n"); + pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, + CMO_SecPSP); + powerpc_firmware_features |= FW_FEATURE_CMO; + pSeries_coalesce_init(); + } else + pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, + CMO_SecPSP); + pr_debug(" <- fw_cmo_feature_init()\n"); +} + +static void __init pseries_add_hw_description(void) +{ + struct device_node *dn; + const char *s; + + dn = of_find_node_by_path("/openprom"); + if (dn) { + if (of_property_read_string(dn, "model", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "of:%s ", s); + + of_node_put(dn); + } + + dn = of_find_node_by_path("/hypervisor"); + if (dn) { + if (of_property_read_string(dn, "compatible", &s) == 0) + seq_buf_printf(&ppc_hw_desc, "hv:%s ", s); + + of_node_put(dn); + return; + } + + if (of_property_read_bool(of_root, "ibm,powervm-partition") || + of_property_read_bool(of_root, "ibm,fw-net-version")) + seq_buf_printf(&ppc_hw_desc, "hv:phyp "); +} + +/* + * Early initialization. Relocation is on but do not reference unbolted pages + */ +static void __init pseries_init(void) +{ + pr_debug(" -> pseries_init()\n"); + + pseries_add_hw_description(); + +#ifdef CONFIG_HVC_CONSOLE + if (firmware_has_feature(FW_FEATURE_LPAR)) + hvc_vio_init_early(); +#endif + if (firmware_has_feature(FW_FEATURE_XDABR)) + ppc_md.set_dabr = pseries_set_xdabr; + else if (firmware_has_feature(FW_FEATURE_DABR)) + ppc_md.set_dabr = pseries_set_dabr; + + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + ppc_md.set_dawr = pseries_set_dawr; + + pSeries_cmo_feature_init(); + iommu_init_early_pSeries(); + + pr_debug(" <- pseries_init()\n"); +} + +/** + * pseries_power_off - tell firmware about how to power off the system. + * + * This function calls either the power-off rtas token in normal cases + * or the ibm,power-off-ups token (if present & requested) in case of + * a power failure. If power-off token is used, power on will only be + * possible with power button press. If ibm,power-off-ups token is used + * it will allow auto poweron after power is restored. + */ +static void pseries_power_off(void) +{ + int rc; + int rtas_poweroff_ups_token = rtas_function_token(RTAS_FN_IBM_POWER_OFF_UPS); + + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_POWER_OFF); + + if (rtas_poweron_auto == 0 || + rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) { + rc = rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1); + printk(KERN_INFO "RTAS power-off returned %d\n", rc); + } else { + rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL); + printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc); + } + for (;;); +} + +static int __init pSeries_probe(void) +{ + if (!of_node_is_type(of_root, "chrp")) + return 0; + + /* Cell blades firmware claims to be chrp while it's not. Until this + * is fixed, we need to avoid those here. + */ + if (of_machine_is_compatible("IBM,CPBW-1.0") || + of_machine_is_compatible("IBM,CBEA")) + return 0; + + pm_power_off = pseries_power_off; + + pr_debug("Machine is%s LPAR !\n", + (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); + + pseries_init(); + + return 1; +} + +static int pSeries_pci_probe_mode(struct pci_bus *bus) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return PCI_PROBE_DEVTREE; + return PCI_PROBE_NORMAL; +} + +#ifdef CONFIG_MEMORY_HOTPLUG +static unsigned long pseries_memory_block_size(void) +{ + return memory_block_size; +} +#endif + +struct pci_controller_ops pseries_pci_controller_ops = { + .probe_mode = pSeries_pci_probe_mode, +#ifdef CONFIG_SPAPR_TCE_IOMMU + .device_group = pSeries_pci_device_group, +#endif +}; + +define_machine(pseries) { + .name = "pSeries", + .probe = pSeries_probe, + .setup_arch = pSeries_setup_arch, + .init_IRQ = pseries_init_irq, + .show_cpuinfo = pSeries_show_cpuinfo, + .log_error = pSeries_log_error, + .discover_phbs = pSeries_discover_phbs, + .pcibios_fixup = pSeries_final_fixup, + .restart = rtas_restart, + .halt = rtas_halt, + .panic = pseries_panic, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .progress = rtas_progress, + .system_reset_exception = pSeries_system_reset_exception, + .machine_check_early = pseries_machine_check_realmode, + .machine_check_exception = pSeries_machine_check_exception, + .machine_check_log_err = pSeries_machine_check_log_err, +#ifdef CONFIG_KEXEC_CORE + .machine_kexec = pseries_machine_kexec, + .kexec_cpu_down = pseries_kexec_cpu_down, +#endif +#ifdef CONFIG_MEMORY_HOTPLUG + .memory_block_size = pseries_memory_block_size, +#endif +}; diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c new file mode 100644 index 0000000000..c597711ef2 --- /dev/null +++ b/arch/powerpc/platforms/pseries/smp.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SMP support for pSeries machines. + * + * Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * Plus various changes from other IBM teams... + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pseries.h" + +/* + * The Primary thread of each non-boot processor was started from the OF client + * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop. + */ +static cpumask_var_t of_spin_mask; + +/* Query where a cpu is now. Return codes #defined in plpar_wrappers.h */ +int smp_query_cpu_stopped(unsigned int pcpu) +{ + int cpu_status, status; + int qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE); + + if (qcss_tok == RTAS_UNKNOWN_SERVICE) { + printk_once(KERN_INFO + "Firmware doesn't support query-cpu-stopped-state\n"); + return QCSS_HARDWARE_ERROR; + } + + status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); + if (status != 0) { + printk(KERN_ERR + "RTAS query-cpu-stopped-state failed: %i\n", status); + return status; + } + + return cpu_status; +} + +/** + * smp_startup_cpu() - start the given cpu + * + * At boot time, there is nothing to do for primary threads which were + * started from Open Firmware. For anything else, call RTAS with the + * appropriate start location. + * + * Returns: + * 0 - failure + * 1 - success + */ +static inline int smp_startup_cpu(unsigned int lcpu) +{ + int status; + unsigned long start_here = + __pa(ppc_function_entry(generic_secondary_smp_init)); + unsigned int pcpu; + int start_cpu; + + if (cpumask_test_cpu(lcpu, of_spin_mask)) + /* Already started by OF and sitting in spin loop */ + return 1; + + pcpu = get_hard_smp_processor_id(lcpu); + + /* Check to see if the CPU out of FW already for kexec */ + if (smp_query_cpu_stopped(pcpu) == QCSS_NOT_STOPPED){ + cpumask_set_cpu(lcpu, of_spin_mask); + return 1; + } + + /* + * If the RTAS start-cpu token does not exist then presume the + * cpu is already spinning. + */ + start_cpu = rtas_function_token(RTAS_FN_START_CPU); + if (start_cpu == RTAS_UNKNOWN_SERVICE) + return 1; + + status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, pcpu); + if (status != 0) { + printk(KERN_ERR "start-cpu failed: %i\n", status); + return 0; + } + + return 1; +} + +static void smp_setup_cpu(int cpu) +{ + if (xive_enabled()) + xive_smp_setup_cpu(); + else if (cpu != boot_cpuid) + xics_setup_cpu(); + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + vpa_init(cpu); + + cpumask_clear_cpu(cpu, of_spin_mask); +} + +static int smp_pSeries_kick_cpu(int nr) +{ + if (nr < 0 || nr >= nr_cpu_ids) + return -EINVAL; + + if (!smp_startup_cpu(nr)) + return -ENOENT; + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca_ptrs[nr]->cpu_start = 1; + + return 0; +} + +static int pseries_smp_prepare_cpu(int cpu) +{ + if (xive_enabled()) + return xive_smp_prepare_cpu(cpu); + return 0; +} + +/* Cause IPI as setup by the interrupt controller (xics or xive) */ +static void (*ic_cause_ipi)(int cpu) __ro_after_init; + +/* Use msgsndp doorbells target is a sibling, else use interrupt controller */ +static void dbell_or_ic_cause_ipi(int cpu) +{ + if (doorbell_try_core_ipi(cpu)) + return; + + ic_cause_ipi(cpu); +} + +static int pseries_cause_nmi_ipi(int cpu) +{ + int hwcpu; + + if (cpu == NMI_IPI_ALL_OTHERS) { + hwcpu = H_SIGNAL_SYS_RESET_ALL_OTHERS; + } else { + if (cpu < 0) { + WARN_ONCE(true, "incorrect cpu parameter %d", cpu); + return 0; + } + + hwcpu = get_hard_smp_processor_id(cpu); + } + + if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS) + return 1; + + return 0; +} + +static __init void pSeries_smp_probe(void) +{ + if (xive_enabled()) + xive_smp_probe(); + else + xics_smp_probe(); + + /* No doorbell facility, must use the interrupt controller for IPIs */ + if (!cpu_has_feature(CPU_FTR_DBELL)) + return; + + /* Doorbells can only be used for IPIs between SMT siblings */ + if (!cpu_has_feature(CPU_FTR_SMT)) + return; + + check_kvm_guest(); + + if (is_kvm_guest()) { + /* + * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp + * faults to the hypervisor which then reads the instruction + * from guest memory, which tends to be slower than using XIVE. + */ + if (xive_enabled()) + return; + + /* + * XICS hcalls aren't as fast, so we can use msgsndp (which + * also helps exercise KVM emulation), however KVM can't + * emulate secure guests because it can't read the instruction + * out of their memory. + */ + if (is_secure_guest()) + return; + } + + /* + * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are + * gang scheduled on the same physical core, so doorbells are always + * faster than the interrupt controller, and they can be used by + * secure guests. + */ + + ic_cause_ipi = smp_ops->cause_ipi; + smp_ops->cause_ipi = dbell_or_ic_cause_ipi; +} + +static struct smp_ops_t pseries_smp_ops = { + .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ + .cause_ipi = NULL, /* Filled at runtime by pSeries_smp_probe() */ + .cause_nmi_ipi = pseries_cause_nmi_ipi, + .probe = pSeries_smp_probe, + .prepare_cpu = pseries_smp_prepare_cpu, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_setup_cpu, + .cpu_bootable = smp_generic_cpu_bootable, +}; + +/* This is called very early */ +void __init smp_init_pseries(void) +{ + int i; + + pr_debug(" -> smp_init_pSeries()\n"); + smp_ops = &pseries_smp_ops; + + alloc_bootmem_cpumask_var(&of_spin_mask); + + /* + * Mark threads which are still spinning in hold loops + * + * We know prom_init will not have started them if RTAS supports + * query-cpu-stopped-state. + */ + if (rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE) == RTAS_UNKNOWN_SERVICE) { + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, of_spin_mask); + } + } else + cpumask_copy(of_spin_mask, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, of_spin_mask); + } + + pr_debug(" <- smp_init_pSeries()\n"); +} diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c new file mode 100644 index 0000000000..5c43435472 --- /dev/null +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2010 Brian King IBM Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct device suspend_dev; + +/** + * pseries_suspend_begin - First phase of hibernation + * + * Check to ensure we are in a valid state to hibernate + * + * Return value: + * 0 on success / other on failure + **/ +static int pseries_suspend_begin(u64 stream_id) +{ + long vasi_state, rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + /* Make sure the state is valid */ + rc = plpar_hcall(H_VASI_STATE, retbuf, stream_id); + + vasi_state = retbuf[0]; + + if (rc) { + pr_err("pseries_suspend_begin: vasi_state returned %ld\n",rc); + return rc; + } else if (vasi_state == H_VASI_ENABLED) { + return -EAGAIN; + } else if (vasi_state != H_VASI_SUSPENDING) { + pr_err("pseries_suspend_begin: vasi_state returned state %ld\n", + vasi_state); + return -EIO; + } + return 0; +} + +/** + * pseries_suspend_enter - Final phase of hibernation + * + * Return value: + * 0 on success / other on failure + **/ +static int pseries_suspend_enter(suspend_state_t state) +{ + return rtas_ibm_suspend_me(NULL); +} + +/** + * store_hibernate - Initiate partition hibernation + * @dev: subsys root device + * @attr: device attribute struct + * @buf: buffer + * @count: buffer size + * + * Write the stream ID received from the HMC to this file + * to trigger hibernating the partition + * + * Return value: + * number of bytes printed to buffer / other on failure + **/ +static ssize_t store_hibernate(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u64 stream_id; + int rc; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + stream_id = simple_strtoul(buf, NULL, 16); + + do { + rc = pseries_suspend_begin(stream_id); + if (rc == -EAGAIN) + ssleep(1); + } while (rc == -EAGAIN); + + if (!rc) + rc = pm_suspend(PM_SUSPEND_MEM); + + if (!rc) { + rc = count; + post_mobility_fixup(); + } + + + return rc; +} + +#define USER_DT_UPDATE 0 +#define KERN_DT_UPDATE 1 + +/** + * show_hibernate - Report device tree update responsibilty + * @dev: subsys root device + * @attr: device attribute struct + * @buf: buffer + * + * Report whether a device tree update is performed by the kernel after a + * resume, or if drmgr must coordinate the update from user space. + * + * Return value: + * 0 if drmgr is to initiate update, and 1 otherwise + **/ +static ssize_t show_hibernate(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", KERN_DT_UPDATE); +} + +static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate); + +static struct bus_type suspend_subsys = { + .name = "power", + .dev_name = "power", +}; + +static const struct platform_suspend_ops pseries_suspend_ops = { + .valid = suspend_valid_only_mem, + .enter = pseries_suspend_enter, +}; + +/** + * pseries_suspend_sysfs_register - Register with sysfs + * + * Return value: + * 0 on success / other on failure + **/ +static int pseries_suspend_sysfs_register(struct device *dev) +{ + struct device *dev_root; + int rc; + + if ((rc = subsys_system_register(&suspend_subsys, NULL))) + return rc; + + dev->id = 0; + dev->bus = &suspend_subsys; + + dev_root = bus_get_dev_root(&suspend_subsys); + if (dev_root) { + rc = device_create_file(dev_root, &dev_attr_hibernate); + put_device(dev_root); + if (rc) + goto subsys_unregister; + } + + return 0; + +subsys_unregister: + bus_unregister(&suspend_subsys); + return rc; +} + +/** + * pseries_suspend_init - initcall for pSeries suspend + * + * Return value: + * 0 on success / other on failure + **/ +static int __init pseries_suspend_init(void) +{ + int rc; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + if ((rc = pseries_suspend_sysfs_register(&suspend_dev))) + return rc; + + suspend_set_ops(&pseries_suspend_ops); + return 0; +} +machine_device_initcall(pseries, pseries_suspend_init); diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c new file mode 100644 index 0000000000..3b4045d508 --- /dev/null +++ b/arch/powerpc/platforms/pseries/svm.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Secure VM platform + * + * Copyright 2018 IBM Corporation + * Author: Anshuman Khandual + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static int __init init_svm(void) +{ + if (!is_secure_guest()) + return 0; + + /* Don't release the SWIOTLB buffer. */ + ppc_swiotlb_enable = 1; + + /* + * Since the guest memory is inaccessible to the host, devices always + * need to use the SWIOTLB buffer for DMA even if dma_capable() says + * otherwise. + */ + ppc_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_FORCE; + + /* Share the SWIOTLB buffer with the host. */ + swiotlb_update_mem_attributes(); + + return 0; +} +machine_early_initcall(pseries, init_svm); + +int set_memory_encrypted(unsigned long addr, int numpages) +{ + if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + return 0; + + if (!PAGE_ALIGNED(addr)) + return -EINVAL; + + uv_unshare_page(PHYS_PFN(__pa(addr)), numpages); + + return 0; +} + +int set_memory_decrypted(unsigned long addr, int numpages) +{ + if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT)) + return 0; + + if (!PAGE_ALIGNED(addr)) + return -EINVAL; + + uv_share_page(PHYS_PFN(__pa(addr)), numpages); + + return 0; +} + +/* There's one dispatch log per CPU. */ +#define NR_DTL_PAGE (DISPATCH_LOG_BYTES * CONFIG_NR_CPUS / PAGE_SIZE) + +static struct page *dtl_page_store[NR_DTL_PAGE]; +static long dtl_nr_pages; + +static bool is_dtl_page_shared(struct page *page) +{ + long i; + + for (i = 0; i < dtl_nr_pages; i++) + if (dtl_page_store[i] == page) + return true; + + return false; +} + +void dtl_cache_ctor(void *addr) +{ + unsigned long pfn = PHYS_PFN(__pa(addr)); + struct page *page = pfn_to_page(pfn); + + if (!is_dtl_page_shared(page)) { + dtl_page_store[dtl_nr_pages] = page; + dtl_nr_pages++; + WARN_ON(dtl_nr_pages >= NR_DTL_PAGE); + uv_share_page(pfn, 1); + } +} diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c new file mode 100644 index 0000000000..f9f682724e --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2022-23 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include + +#include "vas.h" + +#ifdef CONFIG_SYSFS +static struct kobject *pseries_vas_kobj; +static struct kobject *gzip_caps_kobj; + +struct vas_caps_entry { + struct kobject kobj; + struct vas_cop_feat_caps *caps; +}; + +#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj) + +/* + * This function is used to get the notification from the drmgr when + * QoS credits are changed. + */ +static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps, + const char *buf, size_t count) +{ + int err; + u16 creds; + + err = kstrtou16(buf, 0, &creds); + /* + * The user space interface from the management console + * notifies OS with the new QoS credits and then the + * hypervisor. So OS has to use this new credits value + * and reconfigure VAS windows (close or reopen depends + * on the credits available) instead of depending on VAS + * QoS capabilities from the hypervisor. + */ + if (!err) + err = vas_reconfig_capabilties(caps->win_type, creds); + + if (err) + return -EINVAL; + + pr_info("Set QoS total credits %u\n", creds); + + return count; +} + +#define sysfs_caps_entry_read(_name) \ +static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \ +{ \ + return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \ +} + +struct vas_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct vas_cop_feat_caps *, char *); + ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t); +}; + +#define VAS_ATTR_RO(_name) \ + sysfs_caps_entry_read(_name); \ + static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \ + 0444, _name##_show, NULL); + +/* + * Create sysfs interface: + * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities + * This directory contains the following VAS GZIP capabilities + * for the default credit type. + * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_total_credits + * Total number of default credits assigned to the LPAR which + * can be changed with DLPAR operation. + * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_used_credits + * Number of credits used by the user space. One credit will + * be assigned for each window open. + * + * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities + * This directory contains the following VAS GZIP capabilities + * for the Quality of Service (QoS) credit type. + * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_total_credits + * Total number of QoS credits assigned to the LPAR. The user + * has to define this value using HMC interface. It can be + * changed dynamically by the user. + * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_used_credits + * Number of credits used by the user space. + * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/update_total_credits + * Update total QoS credits dynamically + */ + +VAS_ATTR_RO(nr_total_credits); +VAS_ATTR_RO(nr_used_credits); + +static struct vas_sysfs_entry update_total_credits_attribute = + __ATTR(update_total_credits, 0200, NULL, update_total_credits_store); + +static struct attribute *vas_def_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + NULL, +}; +ATTRIBUTE_GROUPS(vas_def_capab); + +static struct attribute *vas_qos_capab_attrs[] = { + &nr_total_credits_attribute.attr, + &nr_used_credits_attribute.attr, + &update_total_credits_attribute.attr, + NULL, +}; +ATTRIBUTE_GROUPS(vas_qos_capab); + +static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + + if (!entry->show) + return -EIO; + + return entry->show(caps, buf); +} + +static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct vas_caps_entry *centry; + struct vas_cop_feat_caps *caps; + struct vas_sysfs_entry *entry; + + centry = to_caps_entry(kobj); + caps = centry->caps; + entry = container_of(attr, struct vas_sysfs_entry, attr); + if (!entry->store) + return -EIO; + + return entry->store(caps, buf, count); +} + +static void vas_type_release(struct kobject *kobj) +{ + struct vas_caps_entry *centry = to_caps_entry(kobj); + kfree(centry); +} + +static const struct sysfs_ops vas_sysfs_ops = { + .show = vas_type_show, + .store = vas_type_store, +}; + +static struct kobj_type vas_def_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_groups = vas_def_capab_groups, +}; + +static struct kobj_type vas_qos_attr_type = { + .release = vas_type_release, + .sysfs_ops = &vas_sysfs_ops, + .default_groups = vas_qos_capab_groups, +}; + +static char *vas_caps_kobj_name(struct vas_caps_entry *centry, + struct kobject **kobj) +{ + struct vas_cop_feat_caps *caps = centry->caps; + + if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_qos_attr_type); + *kobj = gzip_caps_kobj; + return "qos_capabilities"; + } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) { + kobject_init(¢ry->kobj, &vas_def_attr_type); + *kobj = gzip_caps_kobj; + return "default_capabilities"; + } else + return "Unknown"; +} + +/* + * Add feature specific capability dir entry. + * Ex: VDefGzip or VQosGzip + */ +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + struct vas_caps_entry *centry; + struct kobject *kobj = NULL; + int ret = 0; + char *name; + + centry = kzalloc(sizeof(*centry), GFP_KERNEL); + if (!centry) + return -ENOMEM; + + centry->caps = caps; + name = vas_caps_kobj_name(centry, &kobj); + + if (kobj) { + ret = kobject_add(¢ry->kobj, kobj, "%s", name); + + if (ret) { + pr_err("VAS: sysfs kobject add / event failed %d\n", + ret); + kobject_put(¢ry->kobj); + } + } + + return ret; +} + +static struct miscdevice vas_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "vas", +}; + +/* + * Add VAS and VasCaps (overall capabilities) dir entries. + */ +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + int ret; + + ret = misc_register(&vas_miscdev); + if (ret < 0) { + pr_err("%s: register vas misc device failed\n", __func__); + return ret; + } + + /* + * The hypervisor does not expose multiple VAS instances, but can + * see multiple VAS instances on PowerNV. So create 'vas0' directory + * on pseries. + */ + pseries_vas_kobj = kobject_create_and_add("vas0", + &vas_miscdev.this_device->kobj); + if (!pseries_vas_kobj) { + misc_deregister(&vas_miscdev); + pr_err("Failed to create VAS sysfs entry\n"); + return -ENOMEM; + } + + if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) || + (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) { + gzip_caps_kobj = kobject_create_and_add("gzip", + pseries_vas_kobj); + if (!gzip_caps_kobj) { + pr_err("Failed to create VAS GZIP capability entry\n"); + kobject_put(pseries_vas_kobj); + misc_deregister(&vas_miscdev); + return -ENOMEM; + } + } + + return 0; +} + +#else +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps) +{ + return 0; +} + +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps) +{ + return 0; +} +#endif diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c new file mode 100644 index 0000000000..71d52a670d --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.c @@ -0,0 +1,1121 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020-21 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vas.h" + +#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul +#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul +/* The hypervisor allows one credit per window right now */ +#define DEF_WIN_CREDS 1 + +static struct vas_all_caps caps_all; +static bool copypaste_feat; +static struct hv_vas_cop_feat_caps hv_cop_caps; + +static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; +static DEFINE_MUTEX(vas_pseries_mutex); +static bool migration_in_progress; + +static long hcall_return_busy_check(long rc) +{ + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + return rc; +} + +/* + * Allocate VAS window hcall + */ +static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, + u8 wintype, u16 credits) +{ + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; + + do { + rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, + credits, domain[0], domain[1], domain[2], + domain[3], domain[4], domain[5]); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) { + if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { + pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + win->vas_win.winid = retbuf[0]; + win->win_addr = retbuf[1]; + win->complete_irq = retbuf[2]; + win->fault_irq = retbuf[3]; + return 0; + } + + pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", + rc, wintype, credits); + + return -EIO; +} + +/* + * Deallocate VAS window hcall. + */ +static int h_deallocate_vas_window(u64 winid) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", + rc, winid); + return -EIO; +} + +/* + * Modify VAS window. + * After the window is opened with allocate window hcall, configure it + * with flags and LPAR PID before using. + */ +static int h_modify_vas_window(struct pseries_vas_window *win) +{ + long rc; + + /* + * AMR value is not supported in Linux VAS implementation. + * The hypervisor ignores it if 0 is passed. + */ + do { + rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, + win->vas_win.winid, win->pid, 0, + VAS_MOD_WIN_FLAGS, 0); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n", + rc, win->vas_win.winid, win->pid); + return -EIO; +} + +/* + * This hcall is used to determine the capabilities from the hypervisor. + * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES + * @query_type: If 0 is passed, the hypervisor returns the overall + * capabilities which provides all feature(s) that are + * available. Then query the hypervisor to get the + * corresponding capabilities for the specific feature. + * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS + * and VAS GZIP Default capabilities. + * H_QUERY_NX_CAPABILITIES provides NX GZIP + * capabilities. + * @result: Return buffer to save capabilities. + */ +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) +{ + long rc; + + rc = plpar_hcall_norets(hcall, query_type, result); + + if (rc == H_SUCCESS) + return 0; + + /* H_FUNCTION means HV does not support VAS so don't print an error */ + if (rc != H_FUNCTION) { + pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", + (hcall == H_QUERY_VAS_CAPABILITIES) ? + "H_QUERY_VAS_CAPABILITIES" : + "H_QUERY_NX_CAPABILITIES", + rc, query_type, result); + } + + return -EIO; +} +EXPORT_SYMBOL_GPL(h_query_vas_capabilities); + +/* + * hcall to get fault CRB from the hypervisor. + */ +static int h_get_nx_fault(u32 winid, u64 buffer) +{ + long rc; + + rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", + rc, winid, buffer); + return -EIO; + +} + +/* + * Handle the fault interrupt. + * When the fault interrupt is received for each window, query the + * hypervisor to get the fault CRB on the specific fault. Then + * process the CRB by updating CSB or send signal if the user space + * CSB is invalid. + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; + struct vas_user_win_ref *tsk_ref; + int rc; + + while (atomic_read(&txwin->pending_faults)) { + rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); + if (!rc) { + tsk_ref = &txwin->vas_win.task_ref; + vas_dump_crb(&crb); + vas_update_csb(&crb, tsk_ref); + } + atomic_dec(&txwin->pending_faults); + } + + return IRQ_HANDLED; +} + +/* + * irq_default_primary_handler() can be used only with IRQF_ONESHOT + * which disables IRQ before executing the thread handler and enables + * it after. But this disabling interrupt sets the VAS IRQ OFF + * state in the hypervisor. If the NX generates fault interrupt + * during this window, the hypervisor will not deliver this + * interrupt to the LPAR. So use VAS specific IRQ handler instead + * of calling the default primary handler. + */ +static irqreturn_t pseries_vas_irq_handler(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + + /* + * The thread hanlder will process this interrupt if it is + * already running. + */ + atomic_inc(&txwin->pending_faults); + + return IRQ_WAKE_THREAD; +} + +/* + * Allocate window and setup IRQ mapping. + */ +static int allocate_setup_window(struct pseries_vas_window *txwin, + u64 *domain, u8 wintype) +{ + int rc; + + rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); + if (rc) + return rc; + /* + * On PowerVM, the hypervisor setup and forwards the fault + * interrupt per window. So the IRQ setup and fault handling + * will be done for each open window separately. + */ + txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); + if (!txwin->fault_virq) { + pr_err("Failed irq mapping %d\n", txwin->fault_irq); + rc = -EINVAL; + goto out_win; + } + + txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", + txwin->vas_win.winid); + if (!txwin->name) { + rc = -ENOMEM; + goto out_irq; + } + + rc = request_threaded_irq(txwin->fault_virq, + pseries_vas_irq_handler, + pseries_vas_fault_thread_fn, 0, + txwin->name, txwin); + if (rc) { + pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", + txwin->vas_win.winid, txwin->fault_virq, rc); + goto out_free; + } + + txwin->vas_win.wcreds_max = DEF_WIN_CREDS; + + return 0; +out_free: + kfree(txwin->name); +out_irq: + irq_dispose_mapping(txwin->fault_virq); +out_win: + h_deallocate_vas_window(txwin->vas_win.winid); + return rc; +} + +static inline void free_irq_setup(struct pseries_vas_window *txwin) +{ + free_irq(txwin->fault_virq, txwin); + kfree(txwin->name); + irq_dispose_mapping(txwin->fault_virq); +} + +static struct vas_window *vas_allocate_window(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *cop_feat_caps; + struct vas_caps *caps; + struct pseries_vas_window *txwin; + int rc; + + txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); + if (!txwin) + return ERR_PTR(-ENOMEM); + + /* + * A VAS window can have many credits which means that many + * requests can be issued simultaneously. But the hypervisor + * restricts one credit per window. + * The hypervisor introduces 2 different types of credits: + * Default credit type (Uses normal priority FIFO): + * A limited number of credits are assigned to partitions + * based on processor entitlement. But these credits may be + * over-committed on a system depends on whether the CPUs + * are in shared or dedicated modes - that is, more requests + * may be issued across the system than NX can service at + * once which can result in paste command failure (RMA_busy). + * Then the process has to resend requests or fall-back to + * SW compression. + * Quality of Service (QoS) credit type (Uses high priority FIFO): + * To avoid NX HW contention, the system admins can assign + * QoS credits for each LPAR so that this partition is + * guaranteed access to NX resources. These credits are + * assigned to partitions via the HMC. + * Refer PAPR for more information. + * + * Allocate window with QoS credits if user requested. Otherwise + * default credits are used. + */ + if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) + caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; + else + caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; + + cop_feat_caps = &caps->caps; + + if (atomic_inc_return(&cop_feat_caps->nr_used_credits) > + atomic_read(&cop_feat_caps->nr_total_credits)) { + pr_err_ratelimited("Credits are not available to allocate window\n"); + rc = -EINVAL; + goto out; + } + + if (vas_id == -1) { + /* + * The user space is requesting to allocate a window on + * a VAS instance where the process is executing. + * On PowerVM, domain values are passed to the hypervisor + * to select VAS instance. Useful if the process is + * affinity to NUMA node. + * The hypervisor selects VAS instance if + * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. + * The h_allocate_vas_window hcall is defined to take a + * domain values as specified by h_home_node_associativity, + * So no unpacking needs to be done. + */ + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, + VPHN_FLAG_VCPU, hard_smp_processor_id()); + if (rc != H_SUCCESS) { + pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); + goto out; + } + } + + txwin->pid = mfspr(SPRN_PID); + + /* + * Allocate / Deallocate window hcalls and setup / free IRQs + * have to be protected with mutex. + * Open VAS window: Allocate window hcall and setup IRQ + * Close VAS window: Deallocate window hcall and free IRQ + * The hypervisor waits until all NX requests are + * completed before closing the window. So expects OS + * to handle NX faults, means IRQ can be freed only + * after the deallocate window hcall is returned. + * So once the window is closed with deallocate hcall before + * the IRQ is freed, it can be assigned to new allocate + * hcall with the same fault IRQ by the hypervisor. It can + * result in setup IRQ fail for the new window since the + * same fault IRQ is not freed by the OS before. + */ + mutex_lock(&vas_pseries_mutex); + if (migration_in_progress) { + rc = -EBUSY; + } else { + rc = allocate_setup_window(txwin, (u64 *)&domain[0], + cop_feat_caps->win_type); + if (!rc) + caps->nr_open_wins_progress++; + } + + mutex_unlock(&vas_pseries_mutex); + if (rc) + goto out; + + /* + * Modify window and it is ready to use. + */ + rc = h_modify_vas_window(txwin); + if (!rc) + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto out_free; + + txwin->win_type = cop_feat_caps->win_type; + + /* + * The migration SUSPEND thread sets migration_in_progress and + * closes all open windows from the list. But the window is + * added to the list after open and modify HCALLs. So possible + * that migration_in_progress is set before modify HCALL which + * may cause some windows are still open when the hypervisor + * initiates the migration. + * So checks the migration_in_progress flag again and close all + * open windows. + * + * Possible to lose the acquired credit with DLPAR core + * removal after the window is opened. So if there are any + * closed windows (means with lost credits), do not give new + * window to user space. New windows will be opened only + * after the existing windows are reopened when credits are + * available. + */ + mutex_lock(&vas_pseries_mutex); + if (!caps->nr_close_wins && !migration_in_progress) { + list_add(&txwin->win_list, &caps->list); + caps->nr_open_windows++; + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + return &txwin->vas_win; + } + mutex_unlock(&vas_pseries_mutex); + + put_vas_user_win_ref(&txwin->vas_win.task_ref); + rc = -EBUSY; + pr_err_ratelimited("No credit is available to allocate window\n"); + +out_free: + /* + * Window is not operational. Free IRQ before closing + * window so that do not have to hold mutex. + */ + free_irq_setup(txwin); + h_deallocate_vas_window(txwin->vas_win.winid); + /* + * Hold mutex and reduce nr_open_wins_progress counter. + */ + mutex_lock(&vas_pseries_mutex); + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); +out: + atomic_dec(&cop_feat_caps->nr_used_credits); + kfree(txwin); + return ERR_PTR(rc); +} + +static u64 vas_paste_address(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + return win->win_addr; +} + +static int deallocate_free_window(struct pseries_vas_window *win) +{ + int rc = 0; + + /* + * The hypervisor waits for all requests including faults + * are processed before closing the window - Means all + * credits have to be returned. In the case of fault + * request, a credit is returned after OS issues + * H_GET_NX_FAULT hcall. + * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW + * hcall. + */ + rc = h_deallocate_vas_window(win->vas_win.winid); + if (!rc) + free_irq_setup(win); + + return rc; +} + +static int vas_deallocate_window(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + struct vas_cop_feat_caps *caps; + int rc = 0; + + if (!vwin) + return -EINVAL; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + + /* Should not happen */ + if (win->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Window (%u): Invalid window type %u\n", + vwin->winid, win->win_type); + return -EINVAL; + } + + caps = &vascaps[win->win_type].caps; + mutex_lock(&vas_pseries_mutex); + /* + * VAS window is already closed in the hypervisor when + * lost the credit or with migration. So just remove the entry + * from the list, remove task references and free vas_window + * struct. + */ + if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + } else + vascaps[win->win_type].nr_close_wins--; + + list_del(&win->win_list); + atomic_dec(&caps->nr_used_credits); + vascaps[win->win_type].nr_open_windows--; + mutex_unlock(&vas_pseries_mutex); + + mm_context_remove_vas_window(vwin->task_ref.mm); + put_vas_user_win_ref(&vwin->task_ref); + + kfree(win); + return 0; +} + +static const struct vas_user_win_ops vops_pseries = { + .open_win = vas_allocate_window, /* Open and configure window */ + .paste_addr = vas_paste_address, /* To do copy/paste */ + .close_win = vas_deallocate_window, /* Close window */ +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + if (!copypaste_feat) + return -ENOTSUPP; + + return vas_register_coproc_api(mod, cop_type, name, &vops_pseries); +} +EXPORT_SYMBOL_GPL(vas_register_api_pseries); + +void vas_unregister_api_pseries(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); + +/* + * Get the specific capabilities based on the feature type. + * Right now supports GZIP default and GZIP QoS capabilities. + */ +static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, + struct hv_vas_cop_feat_caps *hv_caps) +{ + struct vas_cop_feat_caps *caps; + struct vas_caps *vcaps; + int rc = 0; + + vcaps = &vascaps[type]; + memset(vcaps, 0, sizeof(*vcaps)); + INIT_LIST_HEAD(&vcaps->list); + + vcaps->feat = feat; + caps = &vcaps->caps; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + return rc; + + caps->user_mode = hv_caps->user_mode; + if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { + pr_err("User space COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + + caps->descriptor = be64_to_cpu(hv_caps->descriptor); + caps->win_type = hv_caps->win_type; + if (caps->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Unsupported window type %u\n", caps->win_type); + return -EINVAL; + } + caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); + caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); + atomic_set(&caps->nr_total_credits, + be16_to_cpu(hv_caps->target_lpar_creds)); + if (feat == VAS_GZIP_DEF_FEAT) { + caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); + + if (caps->max_win_creds < DEF_WIN_CREDS) { + pr_err("Window creds(%u) > max allowed window creds(%u)\n", + DEF_WIN_CREDS, caps->max_win_creds); + return -EINVAL; + } + } + + rc = sysfs_add_vas_caps(caps); + if (rc) + return rc; + + copypaste_feat = true; + + return 0; +} + +/* + * VAS windows can be closed due to lost credits when the core is + * removed. So reopen them if credits are available due to DLPAR + * core add and set the window active status. When NX sees the page + * fault on the unmapped paste address, the kernel handles the fault + * by setting the remapping to new paste address if the window is + * active. + */ +static int reconfig_open_windows(struct vas_caps *vcaps, int creds, + bool migrate) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *caps = &vcaps->caps; + struct pseries_vas_window *win = NULL, *tmp; + int rc, mv_ents = 0; + int flag; + + /* + * Nothing to do if there are no closed windows. + */ + if (!vcaps->nr_close_wins) + return 0; + + /* + * For the core removal, the hypervisor reduces the credits + * assigned to the LPAR and the kernel closes VAS windows + * in the hypervisor depends on reduced credits. The kernel + * uses LIFO (the last windows that are opened will be closed + * first) and expects to open in the same order when credits + * are available. + * For example, 40 windows are closed when the LPAR lost 2 cores + * (dedicated). If 1 core is added, this LPAR can have 20 more + * credits. It means the kernel can reopen 20 windows. So move + * 20 entries in the VAS windows lost and reopen next 20 windows. + * For partition migration, reopen all windows that are closed + * during resume. + */ + if ((vcaps->nr_close_wins > creds) && !migrate) + mv_ents = vcaps->nr_close_wins - creds; + + list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) { + if (!mv_ents) + break; + + mv_ents--; + } + + /* + * Open windows if they are closed only with migration or + * DLPAR (lost credit) before. + */ + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + + list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) { + /* + * This window is closed with DLPAR and migration events. + * So reopen the window with the last event. + * The user space is not suspended with the current + * migration notifier. So the user space can issue DLPAR + * CPU hotplug while migration in progress. In this case + * this window will be opened with the last event. + */ + if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) && + (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) { + win->vas_win.status &= ~flag; + continue; + } + + /* + * Nothing to do on this window if it is not closed + * with this flag + */ + if (!(win->vas_win.status & flag)) + continue; + + rc = allocate_setup_window(win, (u64 *)&domain[0], + caps->win_type); + if (rc) + return rc; + + rc = h_modify_vas_window(win); + if (rc) + goto out; + + mutex_lock(&win->vas_win.task_ref.mmap_mutex); + /* + * Set window status to active + */ + win->vas_win.status &= ~flag; + mutex_unlock(&win->vas_win.task_ref.mmap_mutex); + win->win_type = caps->win_type; + if (!--vcaps->nr_close_wins) + break; + } + + return 0; +out: + /* + * Window modify HCALL failed. So close the window to the + * hypervisor and return. + */ + free_irq_setup(win); + h_deallocate_vas_window(win->vas_win.winid); + return rc; +} + +/* + * The hypervisor reduces the available credits if the LPAR lost core. It + * means the excessive windows should not be active and the user space + * should not be using these windows to send compression requests to NX. + * So the kernel closes the excessive windows and unmap the paste address + * such that the user space receives paste instruction failure. Then up to + * the user space to fall back to SW compression and manage with the + * existing windows. + */ +static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds, + bool migrate) +{ + struct pseries_vas_window *win, *tmp; + struct vas_user_win_ref *task_ref; + struct vm_area_struct *vma; + int rc = 0, flag; + + if (migrate) + flag = VAS_WIN_MIGRATE_CLOSE; + else + flag = VAS_WIN_NO_CRED_CLOSE; + + list_for_each_entry_safe(win, tmp, &vcap->list, win_list) { + /* + * This window is already closed due to lost credit + * or for migration before. Go for next window. + * For migration, nothing to do since this window + * closed for DLPAR and will be reopened even on + * the destination system with other DLPAR operation. + */ + if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) || + (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) { + win->vas_win.status |= flag; + continue; + } + + task_ref = &win->vas_win.task_ref; + /* + * VAS mmap (coproc_mmap()) and its fault handler + * (vas_mmap_fault()) are called after holding mmap lock. + * So hold mmap mutex after mmap_lock to avoid deadlock. + */ + mmap_write_lock(task_ref->mm); + mutex_lock(&task_ref->mmap_mutex); + vma = task_ref->vma; + /* + * Number of available credits are reduced, So select + * and close windows. + */ + win->vas_win.status |= flag; + + /* + * vma is set in the original mapping. But this mapping + * is done with mmap() after the window is opened with ioctl. + * so we may not see the original mapping if the core remove + * is done before the original mmap() and after the ioctl. + */ + if (vma) + zap_vma_pages(vma); + + mutex_unlock(&task_ref->mmap_mutex); + mmap_write_unlock(task_ref->mm); + /* + * Close VAS window in the hypervisor, but do not + * free vas_window struct since it may be reused + * when the credit is available later (DLPAR with + * adding cores). This struct will be used + * later when the process issued with close(FD). + */ + rc = deallocate_free_window(win); + /* + * This failure is from the hypervisor. + * No way to stop migration for these failures. + * So ignore error and continue closing other windows. + */ + if (rc && !migrate) + return rc; + + vcap->nr_close_wins++; + + /* + * For migration, do not depend on lpar_creds in case if + * mismatch with the hypervisor value (should not happen). + * So close all active windows in the list and will be + * reopened windows based on the new lpar_creds on the + * destination system during resume. + */ + if (!migrate && !--excess_creds) + break; + } + + return 0; +} + +/* + * Get new VAS capabilities when the core add/removal configuration + * changes. Reconfig window configurations based on the credits + * availability from this new capabilities. + */ +int vas_reconfig_capabilties(u8 type, int new_nr_creds) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds; + struct vas_caps *vcaps; + int rc = 0, nr_active_wins; + + if (type >= VAS_MAX_FEAT_TYPE) { + pr_err("Invalid credit type %d\n", type); + return -EINVAL; + } + + vcaps = &vascaps[type]; + caps = &vcaps->caps; + + mutex_lock(&vas_pseries_mutex); + + old_nr_creds = atomic_read(&caps->nr_total_credits); + + atomic_set(&caps->nr_total_credits, new_nr_creds); + /* + * The total number of available credits may be decreased or + * increased with DLPAR operation. Means some windows have to be + * closed / reopened. Hold the vas_pseries_mutex so that the + * user space can not open new windows. + */ + if (old_nr_creds < new_nr_creds) { + /* + * If the existing target credits is less than the new + * target, reopen windows if they are closed due to + * the previous DLPAR (core removal). + */ + rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds, + false); + } else { + /* + * # active windows is more than new LPAR available + * credits. So close the excessive windows. + * On pseries, each window will have 1 credit. + */ + nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins; + if (nr_active_wins > new_nr_creds) + rc = reconfig_close_windows(vcaps, + nr_active_wins - new_nr_creds, + false); + } + + mutex_unlock(&vas_pseries_mutex); + return rc; +} + +int pseries_vas_dlpar_cpu(void) +{ + int new_nr_creds, rc; + + /* + * NX-GZIP is not enabled. Nothing to do for DLPAR event + */ + if (!copypaste_feat) + return 0; + + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds); + } + + if (rc) + pr_err("Failed reconfig VAS capabilities with DLPAR\n"); + + return rc; +} + +/* + * Total number of default credits available (target_credits) + * in LPAR depends on number of cores configured. It varies based on + * whether processors are in shared mode or dedicated mode. + * Get the notifier when CPU configuration is changed with DLPAR + * operation so that get the new target_credits (vas default capabilities) + * and then update the existing windows usage if needed. + */ +static int pseries_vas_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct of_reconfig_data *rd = data; + struct device_node *dn = rd->dn; + const __be32 *intserv = NULL; + int len; + + /* + * For shared CPU partition, the hypervisor assigns total credits + * based on entitled core capacity. So updating VAS windows will + * be called from lparcfg_write(). + */ + if (is_shared_processor()) + return NOTIFY_OK; + + if ((action == OF_RECONFIG_ATTACH_NODE) || + (action == OF_RECONFIG_DETACH_NODE)) + intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + /* + * Processor config is not changed + */ + if (!intserv) + return NOTIFY_OK; + + return pseries_vas_dlpar_cpu(); +} + +static struct notifier_block pseries_vas_nb = { + .notifier_call = pseries_vas_notifier, +}; + +/* + * For LPM, all windows have to be closed on the source partition + * before migration and reopen them on the destination partition + * after migration. So closing windows during suspend and + * reopen them during resume. + */ +int vas_migration_handler(int action) +{ + struct vas_cop_feat_caps *caps; + int old_nr_creds, new_nr_creds = 0; + struct vas_caps *vcaps; + int i, rc = 0; + + pr_info("VAS migration event %d\n", action); + + /* + * NX-GZIP is not enabled. Nothing to do for migration. + */ + if (!copypaste_feat) + return rc; + + if (action == VAS_SUSPEND) + migration_in_progress = true; + else + migration_in_progress = false; + + for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) { + vcaps = &vascaps[i]; + caps = &vcaps->caps; + old_nr_creds = atomic_read(&caps->nr_total_credits); + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, + vcaps->feat, + (u64)virt_to_phys(&hv_cop_caps)); + if (!rc) { + new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds); + /* + * Should not happen. But incase print messages, close + * all windows in the list during suspend and reopen + * windows based on new lpar_creds on the destination + * system. + */ + if (old_nr_creds != new_nr_creds) { + pr_err("Target credits mismatch with the hypervisor\n"); + pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n", + action, old_nr_creds, new_nr_creds); + pr_err("Used creds: %d, Active creds: %d\n", + atomic_read(&caps->nr_used_credits), + vcaps->nr_open_windows - vcaps->nr_close_wins); + } + } else { + pr_err("state(%d): Get VAS capabilities failed with %d\n", + action, rc); + /* + * We can not stop migration with the current lpm + * implementation. So continue closing all windows in + * the list (during suspend) and return without + * opening windows (during resume) if VAS capabilities + * HCALL failed. + */ + if (action == VAS_RESUME) + goto out; + } + + switch (action) { + case VAS_SUSPEND: + mutex_lock(&vas_pseries_mutex); + rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, + true); + /* + * Windows are included in the list after successful + * open. So wait for closing these in-progress open + * windows in vas_allocate_window() which will be + * done if the migration_in_progress is set. + */ + while (vcaps->nr_open_wins_progress) { + mutex_unlock(&vas_pseries_mutex); + msleep(10); + mutex_lock(&vas_pseries_mutex); + } + mutex_unlock(&vas_pseries_mutex); + break; + case VAS_RESUME: + mutex_lock(&vas_pseries_mutex); + atomic_set(&caps->nr_total_credits, new_nr_creds); + rc = reconfig_open_windows(vcaps, new_nr_creds, true); + mutex_unlock(&vas_pseries_mutex); + break; + default: + /* should not happen */ + pr_err("Invalid migration action %d\n", action); + rc = -EINVAL; + goto out; + } + + /* + * Ignore errors during suspend and return for resume. + */ + if (rc && (action == VAS_RESUME)) + goto out; + } + + pr_info("VAS migration event (%d) successful\n", action); + +out: + return rc; +} + +static int __init pseries_vas_init(void) +{ + struct hv_vas_all_caps *hv_caps; + int rc = 0; + + /* + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { + copypaste_feat = false; + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + /* + * Get VAS overall capabilities by passing 0 to feature type. + */ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); + caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + + sysfs_pseries_vas_init(&caps_all); + + /* + * QOS capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, + VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps); + + if (rc) + goto out; + } + /* + * Default capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) + rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, + VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps); + + if (!rc && copypaste_feat) { + if (firmware_has_feature(FW_FEATURE_LPAR)) + of_reconfig_notifier_register(&pseries_vas_nb); + + pr_info("GZIP feature is available\n"); + } else { + /* + * Should not happen, but only when get default + * capabilities HCALL failed. So disable copy paste + * feature. + */ + copypaste_feat = false; + } + +out: + kfree(hv_caps); + return rc; +} +machine_device_initcall(pseries, pseries_vas_init); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h new file mode 100644 index 0000000000..45567cd131 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2020-21 IBM Corp. + */ + +#ifndef _VAS_H +#define _VAS_H +#include +#include +#include + +/* + * VAS window modify flags + */ +#define VAS_MOD_WIN_CLOSE PPC_BIT(0) +#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1) +#define VAS_MOD_WIN_DR PPC_BIT(3) +#define VAS_MOD_WIN_PR PPC_BIT(4) +#define VAS_MOD_WIN_SF PPC_BIT(5) +#define VAS_MOD_WIN_TA PPC_BIT(6) +#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \ + VAS_MOD_WIN_PR | VAS_MOD_WIN_SF) + +#define VAS_WIN_ACTIVE 0x0 +#define VAS_WIN_CLOSED 0x1 +#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */ +/* Process of being modified, deallocated, or quiesced */ +#define VAS_WIN_MOD_IN_PROCESS 0x3 + +#define VAS_COPY_PASTE_USER_MODE 0x00000001 +#define VAS_COP_OP_USER_MODE 0x00000010 + +#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970 +#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970 + +enum vas_migrate_action { + VAS_SUSPEND, + VAS_RESUME, +}; + +/* + * Co-processor feature - GZIP QoS windows or GZIP default windows + */ +enum vas_cop_feat_type { + VAS_GZIP_QOS_FEAT_TYPE, + VAS_GZIP_DEF_FEAT_TYPE, + VAS_MAX_FEAT_TYPE, +}; + +/* + * Use to get feature specific capabilities from the + * hypervisor. + */ +struct hv_vas_cop_feat_caps { + __be64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; + __be16 max_lpar_creds; + __be16 max_win_creds; + union { + __be16 reserved; + __be16 def_lpar_creds; /* Used for default capabilities */ + }; + __be16 target_lpar_creds; +} __packed __aligned(0x1000); + +/* + * Feature specific (QoS or default) capabilities. + */ +struct vas_cop_feat_caps { + u64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; /* User mode copy/paste or COP HCALL */ + u16 max_lpar_creds; /* Max credits available in LPAR */ + /* Max credits can be assigned per window */ + u16 max_win_creds; + union { + u16 reserved; /* Used for QoS credit type */ + u16 def_lpar_creds; /* Used for default credit type */ + }; + /* Total LPAR available credits. Can be different from max LPAR */ + /* credits due to DLPAR operation */ + atomic_t nr_total_credits; /* Total credits assigned to LPAR */ + atomic_t nr_used_credits; /* Used credits so far */ +}; + +/* + * Feature (QoS or Default) specific to store capabilities and + * the list of open windows. + */ +struct vas_caps { + struct vas_cop_feat_caps caps; + struct list_head list; /* List of open windows */ + int nr_open_wins_progress; /* Number of open windows in */ + /* progress. Used in migration */ + int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ + int nr_open_windows; /* Number of successful open windows */ + u8 feat; /* Feature type */ +}; + +/* + * To get window information from the hypervisor. + */ +struct hv_vas_win_lpar { + __be16 version; + u8 win_type; + u8 status; + __be16 credits; /* No of credits assigned to this window */ + __be16 reserved; + __be32 pid; /* LPAR Process ID */ + __be32 tid; /* LPAR Thread ID */ + __be64 win_addr; /* Paste address */ + __be32 interrupt; /* Interrupt when NX request completes */ + __be32 fault; /* Interrupt when NX sees fault */ + /* Associativity Domain Identifiers as returned in */ + /* H_HOME_NODE_ASSOCIATIVITY */ + __be64 domain[6]; + __be64 win_util; /* Number of bytes processed */ +} __packed __aligned(0x1000); + +struct pseries_vas_window { + struct vas_window vas_win; + u64 win_addr; /* Physical paste address */ + u8 win_type; /* QoS or Default window */ + u32 complete_irq; /* Completion interrupt */ + u32 fault_irq; /* Fault interrupt */ + u64 domain[6]; /* Associativity domain Ids */ + /* this window is allocated */ + u64 util; + u32 pid; /* PID associated with this window */ + + /* List of windows opened which is used for LPM */ + struct list_head win_list; + u64 flags; + char *name; + int fault_virq; + atomic_t pending_faults; /* Number of pending faults */ +}; + +int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps); +int vas_reconfig_capabilties(u8 type, int new_nr_creds); +int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps); + +#ifdef CONFIG_PPC_VAS +int vas_migration_handler(int action); +int pseries_vas_dlpar_cpu(void); +#else +static inline int vas_migration_handler(int action) +{ + return 0; +} +static inline int pseries_vas_dlpar_cpu(void) +{ + return 0; +} +#endif +#endif /* _VAS_H */ diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c new file mode 100644 index 0000000000..2dc9cbc4bc --- /dev/null +++ b/arch/powerpc/platforms/pseries/vio.c @@ -0,0 +1,1729 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * IBM PowerPC Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003,2008 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard + * Stephen Rothwell + * Robert Jennings + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct vio_dev vio_bus_device = { /* fake "parent" device */ + .name = "vio", + .type = "", + .dev.init_name = "vio", + .dev.bus = &vio_bus_type, +}; + +#ifdef CONFIG_PPC_SMLPAR +/** + * vio_cmo_pool - A pool of IO memory for CMO use + * + * @size: The size of the pool in bytes + * @free: The amount of free memory in the pool + */ +struct vio_cmo_pool { + size_t size; + size_t free; +}; + +/* How many ms to delay queued balance work */ +#define VIO_CMO_BALANCE_DELAY 100 + +/* Portion out IO memory to CMO devices by this chunk size */ +#define VIO_CMO_BALANCE_CHUNK 131072 + +/** + * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement + * + * @vio_dev: struct vio_dev pointer + * @list: pointer to other devices on bus that are being tracked + */ +struct vio_cmo_dev_entry { + struct vio_dev *viodev; + struct list_head list; +}; + +/** + * vio_cmo - VIO bus accounting structure for CMO entitlement + * + * @lock: spinlock for entire structure + * @balance_q: work queue for balancing system entitlement + * @device_list: list of CMO-enabled devices requiring entitlement + * @entitled: total system entitlement in bytes + * @reserve: pool of memory from which devices reserve entitlement, incl. spare + * @excess: pool of excess entitlement not needed for device reserves or spare + * @spare: IO memory for device hotplug functionality + * @min: minimum necessary for system operation + * @desired: desired memory for system operation + * @curr: bytes currently allocated + * @high: high water mark for IO data usage + */ +static struct vio_cmo { + spinlock_t lock; + struct delayed_work balance_q; + struct list_head device_list; + size_t entitled; + struct vio_cmo_pool reserve; + struct vio_cmo_pool excess; + size_t spare; + size_t min; + size_t desired; + size_t curr; + size_t high; +} vio_cmo; + +/** + * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows + */ +static int vio_cmo_num_OF_devs(void) +{ + struct device_node *node_vroot; + int count = 0; + + /* + * Count the number of vdevice entries with an + * ibm,my-dma-window OF property + */ + node_vroot = of_find_node_by_name(NULL, "vdevice"); + if (node_vroot) { + struct device_node *of_node; + struct property *prop; + + for_each_child_of_node(node_vroot, of_node) { + prop = of_find_property(of_node, "ibm,my-dma-window", + NULL); + if (prop) + count++; + } + } + of_node_put(node_vroot); + return count; +} + +/** + * vio_cmo_alloc - allocate IO memory for CMO-enable devices + * + * @viodev: VIO device requesting IO memory + * @size: size of allocation requested + * + * Allocations come from memory reserved for the devices and any excess + * IO memory available to all devices. The spare pool used to service + * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be + * made available. + * + * Return codes: + * 0 for successful allocation and -ENOMEM for a failure + */ +static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t reserve_free = 0; + size_t excess_free = 0; + int ret = -ENOMEM; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Determine the amount of free entitlement available in reserve */ + if (viodev->cmo.entitled > viodev->cmo.allocated) + reserve_free = viodev->cmo.entitled - viodev->cmo.allocated; + + /* If spare is not fulfilled, the excess pool can not be used. */ + if (vio_cmo.spare >= VIO_CMO_MIN_ENT) + excess_free = vio_cmo.excess.free; + + /* The request can be satisfied */ + if ((reserve_free + excess_free) >= size) { + vio_cmo.curr += size; + if (vio_cmo.curr > vio_cmo.high) + vio_cmo.high = vio_cmo.curr; + viodev->cmo.allocated += size; + size -= min(reserve_free, size); + vio_cmo.excess.free -= size; + ret = 0; + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return ret; +} + +/** + * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices + * @viodev: VIO device freeing IO memory + * @size: size of deallocation + * + * IO memory is freed by the device back to the correct memory pools. + * The spare pool is replenished first from either memory pool, then + * the reserve pool is used to reduce device entitlement, the excess + * pool is used to increase the reserve pool toward the desired entitlement + * target, and then the remaining memory is returned to the pools. + * + */ +static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) +{ + unsigned long flags; + size_t spare_needed = 0; + size_t excess_freed = 0; + size_t reserve_freed = size; + size_t tmp; + int balance = 0; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.curr -= size; + + /* Amount of memory freed from the excess pool */ + if (viodev->cmo.allocated > viodev->cmo.entitled) { + excess_freed = min(reserve_freed, (viodev->cmo.allocated - + viodev->cmo.entitled)); + reserve_freed -= excess_freed; + } + + /* Remove allocation from device */ + viodev->cmo.allocated -= (reserve_freed + excess_freed); + + /* Spare is a subset of the reserve pool, replenish it first. */ + spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare; + + /* + * Replenish the spare in the reserve pool from the excess pool. + * This moves entitlement into the reserve pool. + */ + if (spare_needed && excess_freed) { + tmp = min(excess_freed, spare_needed); + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + vio_cmo.spare += tmp; + excess_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Replenish the spare in the reserve pool from the reserve pool. + * This removes entitlement from the device down to VIO_CMO_MIN_ENT, + * if needed, and gives it to the spare pool. The amount of used + * memory in this pool does not change. + */ + if (spare_needed && reserve_freed) { + tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT)); + + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + reserve_freed -= tmp; + spare_needed -= tmp; + balance = 1; + } + + /* + * Increase the reserve pool until the desired allocation is met. + * Move an allocation freed from the excess pool into the reserve + * pool and schedule a balance operation. + */ + if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) { + tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size)); + + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + excess_freed -= tmp; + balance = 1; + } + + /* Return memory from the excess pool to that pool */ + if (excess_freed) + vio_cmo.excess.free += excess_freed; + + if (balance) + schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_entitlement_update - Manage system entitlement changes + * + * @new_entitlement: new system entitlement to attempt to accommodate + * + * Increases in entitlement will be used to fulfill the spare entitlement + * and the rest is given to the excess pool. Decreases, if they are + * possible, come from the excess pool and from unused device entitlement + * + * Returns: 0 on success, -ENOMEM when change can not be made + */ +int vio_cmo_entitlement_update(size_t new_entitlement) +{ + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail, delta, tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Entitlement increases */ + if (new_entitlement > vio_cmo.entitled) { + delta = new_entitlement - vio_cmo.entitled; + + /* Fulfill spare allocation */ + if (vio_cmo.spare < VIO_CMO_MIN_ENT) { + tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare)); + vio_cmo.spare += tmp; + vio_cmo.reserve.size += tmp; + delta -= tmp; + } + + /* Remaining new allocation goes to the excess pool */ + vio_cmo.entitled += delta; + vio_cmo.excess.size += delta; + vio_cmo.excess.free += delta; + + goto out; + } + + /* Entitlement decreases */ + delta = vio_cmo.entitled - new_entitlement; + avail = vio_cmo.excess.free; + + /* + * Need to check how much unused entitlement each device can + * sacrifice to fulfill entitlement change. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (avail >= delta) + break; + + viodev = dev_ent->viodev; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + avail += viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + } + + if (delta <= avail) { + vio_cmo.entitled -= delta; + + /* Take entitlement from the excess pool first */ + tmp = min(vio_cmo.excess.free, delta); + vio_cmo.excess.size -= tmp; + vio_cmo.excess.free -= tmp; + delta -= tmp; + + /* + * Remove all but VIO_CMO_MIN_ENT bytes from devices + * until entitlement change is served + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + if (!delta) + break; + + viodev = dev_ent->viodev; + tmp = 0; + if ((viodev->cmo.entitled > viodev->cmo.allocated) && + (viodev->cmo.entitled > VIO_CMO_MIN_ENT)) + tmp = viodev->cmo.entitled - + max_t(size_t, viodev->cmo.allocated, + VIO_CMO_MIN_ENT); + viodev->cmo.entitled -= min(tmp, delta); + delta -= min(tmp, delta); + } + } else { + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + +out: + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_balance - Balance entitlement among devices + * + * @work: work queue structure for this operation + * + * Any system entitlement above the minimum needed for devices, or + * already allocated to devices, can be distributed to the devices. + * The list of devices is iterated through to recalculate the desired + * entitlement level and to determine how much entitlement above the + * minimum entitlement is allocated to devices. + * + * Small chunks of the available entitlement are given to devices until + * their requirements are fulfilled or there is no entitlement left to give. + * Upon completion sizes of the reserve and excess pools are calculated. + * + * The system minimum entitlement level is also recalculated here. + * Entitlement will be reserved for devices even after vio_bus_remove to + * accommodate reloading the driver. The OF tree is walked to count the + * number of devices present and this will remove entitlement for devices + * that have actually left the system after having vio_bus_remove called. + */ +static void vio_cmo_balance(struct work_struct *work) +{ + struct vio_cmo *cmo; + struct vio_dev *viodev; + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t avail = 0, level, chunk, need; + int devcount = 0, fulfilled; + + cmo = container_of(work, struct vio_cmo, balance_q.work); + + spin_lock_irqsave(&vio_cmo.lock, flags); + + /* Calculate minimum entitlement and fulfill spare */ + cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT; + BUG_ON(cmo->min > cmo->entitled); + cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min)); + cmo->min += cmo->spare; + cmo->desired = cmo->min; + + /* + * Determine how much entitlement is available and reset device + * entitlements + */ + avail = cmo->entitled - cmo->spare; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + devcount++; + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT); + avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT); + } + + /* + * Having provided each device with the minimum entitlement, loop + * over the devices portioning out the remaining entitlement + * until there is nothing left. + */ + level = VIO_CMO_MIN_ENT; + while (avail) { + fulfilled = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + + if (viodev->cmo.desired <= level) { + fulfilled++; + continue; + } + + /* + * Give the device up to VIO_CMO_BALANCE_CHUNK + * bytes of entitlement, but do not exceed the + * desired level of entitlement for the device. + */ + chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK); + chunk = min(chunk, (viodev->cmo.desired - + viodev->cmo.entitled)); + viodev->cmo.entitled += chunk; + + /* + * If the memory for this entitlement increase was + * already allocated to the device it does not come + * from the available pool being portioned out. + */ + need = max(viodev->cmo.allocated, viodev->cmo.entitled)- + max(viodev->cmo.allocated, level); + avail -= need; + + } + if (fulfilled == devcount) + break; + level += VIO_CMO_BALANCE_CHUNK; + } + + /* Calculate new reserve and excess pool sizes */ + cmo->reserve.size = cmo->min; + cmo->excess.free = 0; + cmo->excess.size = 0; + need = 0; + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) { + viodev = dev_ent->viodev; + /* Calculated reserve size above the minimum entitlement */ + if (viodev->cmo.entitled) + cmo->reserve.size += (viodev->cmo.entitled - + VIO_CMO_MIN_ENT); + /* Calculated used excess entitlement */ + if (viodev->cmo.allocated > viodev->cmo.entitled) + need += viodev->cmo.allocated - viodev->cmo.entitled; + } + cmo->excess.size = cmo->entitled - cmo->reserve.size; + cmo->excess.free = cmo->excess.size - need; + + cancel_delayed_work(to_delayed_work(work)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + unsigned long attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + void *ret; + + if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) { + atomic_inc(&viodev->cmo.allocs_failed); + return NULL; + } + + ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, + dma_handle, dev->coherent_dma_mask, flag, + dev_to_node(dev)); + if (unlikely(ret == NULL)) { + vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); + atomic_inc(&viodev->cmo.allocs_failed); + } + + return ret; +} + +static void vio_dma_iommu_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + unsigned long attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + + iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle); + vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE)); +} + +static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction, + unsigned long attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl = get_iommu_table_base(dev); + dma_addr_t ret = DMA_MAPPING_ERROR; + + if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) + goto out_fail; + ret = iommu_map_page(dev, tbl, page, offset, size, dma_get_mask(dev), + direction, attrs); + if (unlikely(ret == DMA_MAPPING_ERROR)) + goto out_deallocate; + return ret; + +out_deallocate: + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); +out_fail: + atomic_inc(&viodev->cmo.allocs_failed); + return DMA_MAPPING_ERROR; +} + +static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, + enum dma_data_direction direction, + unsigned long attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl = get_iommu_table_base(dev); + + iommu_unmap_page(tbl, dma_handle, size, direction, attrs); + vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))); +} + +static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, + int nelems, enum dma_data_direction direction, + unsigned long attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl = get_iommu_table_base(dev); + struct scatterlist *sgl; + int ret, count; + size_t alloc_size = 0; + + for_each_sg(sglist, sgl, nelems, count) + alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl)); + + ret = vio_cmo_alloc(viodev, alloc_size); + if (ret) + goto out_fail; + ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev), + direction, attrs); + if (unlikely(!ret)) + goto out_deallocate; + + for_each_sg(sglist, sgl, ret, count) + alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); + if (alloc_size) + vio_cmo_dealloc(viodev, alloc_size); + return ret; + +out_deallocate: + vio_cmo_dealloc(viodev, alloc_size); +out_fail: + atomic_inc(&viodev->cmo.allocs_failed); + return ret; +} + +static void vio_dma_iommu_unmap_sg(struct device *dev, + struct scatterlist *sglist, int nelems, + enum dma_data_direction direction, + unsigned long attrs) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct iommu_table *tbl = get_iommu_table_base(dev); + struct scatterlist *sgl; + size_t alloc_size = 0; + int count; + + for_each_sg(sglist, sgl, nelems, count) + alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl)); + + ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs); + vio_cmo_dealloc(viodev, alloc_size); +} + +static const struct dma_map_ops vio_dma_mapping_ops = { + .alloc = vio_dma_iommu_alloc_coherent, + .free = vio_dma_iommu_free_coherent, + .map_sg = vio_dma_iommu_map_sg, + .unmap_sg = vio_dma_iommu_unmap_sg, + .map_page = vio_dma_iommu_map_page, + .unmap_page = vio_dma_iommu_unmap_page, + .dma_supported = dma_iommu_dma_supported, + .get_required_mask = dma_iommu_get_required_mask, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, +}; + +/** + * vio_cmo_set_dev_desired - Set desired entitlement for a device + * + * @viodev: struct vio_dev for device to alter + * @desired: new desired entitlement level in bytes + * + * For use by devices to request a change to their entitlement at runtime or + * through sysfs. The desired entitlement level is changed and a balancing + * of system resources is scheduled to run in the future. + */ +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) +{ + unsigned long flags; + struct vio_cmo_dev_entry *dev_ent; + int found = 0; + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (desired < VIO_CMO_MIN_ENT) + desired = VIO_CMO_MIN_ENT; + + /* + * Changes will not be made for devices not in the device list. + * If it is not in the device list, then no driver is loaded + * for the device and it can not receive entitlement. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + found = 1; + break; + } + if (!found) { + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return; + } + + /* Increase/decrease in desired device entitlement */ + if (desired >= viodev->cmo.desired) { + /* Just bump the bus and device values prior to a balance*/ + vio_cmo.desired += desired - viodev->cmo.desired; + viodev->cmo.desired = desired; + } else { + /* Decrease bus and device values for desired entitlement */ + vio_cmo.desired -= viodev->cmo.desired - desired; + viodev->cmo.desired = desired; + /* + * If less entitlement is desired than current entitlement, move + * any reserve memory in the change region to the excess pool. + */ + if (viodev->cmo.entitled > desired) { + vio_cmo.reserve.size -= viodev->cmo.entitled - desired; + vio_cmo.excess.size += viodev->cmo.entitled - desired; + /* + * If entitlement moving from the reserve pool to the + * excess pool is currently unused, add to the excess + * free counter. + */ + if (viodev->cmo.allocated < viodev->cmo.entitled) + vio_cmo.excess.free += viodev->cmo.entitled - + max(viodev->cmo.allocated, desired); + viodev->cmo.entitled = desired; + } + } + schedule_delayed_work(&vio_cmo.balance_q, 0); + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +/** + * vio_cmo_bus_probe - Handle CMO specific bus probe activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Determine the devices IO memory entitlement needs, attempting + * to satisfy the system minimum entitlement at first and scheduling + * a balance operation to take care of the rest at a later time. + * + * Returns: 0 on success, -EINVAL when device doesn't support CMO, and + * -ENOMEM when entitlement is not available for device or + * device entry. + * + */ +static int vio_cmo_bus_probe(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + struct device *dev = &viodev->dev; + struct iommu_table *tbl; + struct vio_driver *viodrv = to_vio_driver(dev->driver); + unsigned long flags; + size_t size; + bool dma_capable = false; + + tbl = get_iommu_table_base(dev); + + /* A device requires entitlement if it has a DMA window property */ + switch (viodev->family) { + case VDEVICE: + if (of_get_property(viodev->dev.of_node, + "ibm,my-dma-window", NULL)) + dma_capable = true; + break; + case PFO: + dma_capable = false; + break; + default: + dev_warn(dev, "unknown device family: %d\n", viodev->family); + BUG(); + break; + } + + /* Configure entitlement for the device. */ + if (dma_capable) { + /* Check that the driver is CMO enabled and get desired DMA */ + if (!viodrv->get_desired_dma) { + dev_err(dev, "%s: device driver does not support CMO\n", + __func__); + return -EINVAL; + } + + viodev->cmo.desired = + IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl); + if (viodev->cmo.desired < VIO_CMO_MIN_ENT) + viodev->cmo.desired = VIO_CMO_MIN_ENT; + size = VIO_CMO_MIN_ENT; + + dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry), + GFP_KERNEL); + if (!dev_ent) + return -ENOMEM; + + dev_ent->viodev = viodev; + spin_lock_irqsave(&vio_cmo.lock, flags); + list_add(&dev_ent->list, &vio_cmo.device_list); + } else { + viodev->cmo.desired = 0; + size = 0; + spin_lock_irqsave(&vio_cmo.lock, flags); + } + + /* + * If the needs for vio_cmo.min have not changed since they + * were last set, the number of devices in the OF tree has + * been constant and the IO memory for this is already in + * the reserve pool. + */ + if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) * + VIO_CMO_MIN_ENT)) { + /* Updated desired entitlement if device requires it */ + if (size) + vio_cmo.desired += (viodev->cmo.desired - + VIO_CMO_MIN_ENT); + } else { + size_t tmp; + + tmp = vio_cmo.spare + vio_cmo.excess.free; + if (tmp < size) { + dev_err(dev, "%s: insufficient free " + "entitlement to add device. " + "Need %lu, have %lu\n", __func__, + size, (vio_cmo.spare + tmp)); + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return -ENOMEM; + } + + /* Use excess pool first to fulfill request */ + tmp = min(size, vio_cmo.excess.free); + vio_cmo.excess.free -= tmp; + vio_cmo.excess.size -= tmp; + vio_cmo.reserve.size += tmp; + + /* Use spare if excess pool was insufficient */ + vio_cmo.spare -= size - tmp; + + /* Update bus accounting */ + vio_cmo.min += size; + vio_cmo.desired += viodev->cmo.desired; + } + spin_unlock_irqrestore(&vio_cmo.lock, flags); + return 0; +} + +/** + * vio_cmo_bus_remove - Handle CMO specific bus removal activities + * + * @viodev - Pointer to struct vio_dev for device + * + * Remove the device from the cmo device list. The minimum entitlement + * will be reserved for the device as long as it is in the system. The + * rest of the entitlement the device had been allocated will be returned + * to the system. + */ +static void vio_cmo_bus_remove(struct vio_dev *viodev) +{ + struct vio_cmo_dev_entry *dev_ent; + unsigned long flags; + size_t tmp; + + spin_lock_irqsave(&vio_cmo.lock, flags); + if (viodev->cmo.allocated) { + dev_err(&viodev->dev, "%s: device had %lu bytes of IO " + "allocated after remove operation.\n", + __func__, viodev->cmo.allocated); + BUG(); + } + + /* + * Remove the device from the device list being maintained for + * CMO enabled devices. + */ + list_for_each_entry(dev_ent, &vio_cmo.device_list, list) + if (viodev == dev_ent->viodev) { + list_del(&dev_ent->list); + kfree(dev_ent); + break; + } + + /* + * Devices may not require any entitlement and they do not need + * to be processed. Otherwise, return the device's entitlement + * back to the pools. + */ + if (viodev->cmo.entitled) { + /* + * This device has not yet left the OF tree, it's + * minimum entitlement remains in vio_cmo.min and + * vio_cmo.desired + */ + vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT); + + /* + * Save min allocation for device in reserve as long + * as it exists in OF tree as determined by later + * balance operation + */ + viodev->cmo.entitled -= VIO_CMO_MIN_ENT; + + /* Replenish spare from freed reserve pool */ + if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) { + tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT - + vio_cmo.spare)); + vio_cmo.spare += tmp; + viodev->cmo.entitled -= tmp; + } + + /* Remaining reserve goes to excess pool */ + vio_cmo.excess.size += viodev->cmo.entitled; + vio_cmo.excess.free += viodev->cmo.entitled; + vio_cmo.reserve.size -= viodev->cmo.entitled; + + /* + * Until the device is removed it will keep a + * minimum entitlement; this will guarantee that + * a module unload/load will result in a success. + */ + viodev->cmo.entitled = VIO_CMO_MIN_ENT; + viodev->cmo.desired = VIO_CMO_MIN_ENT; + atomic_set(&viodev->cmo.allocs_failed, 0); + } + + spin_unlock_irqrestore(&vio_cmo.lock, flags); +} + +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) +{ + set_dma_ops(&viodev->dev, &vio_dma_mapping_ops); +} + +/** + * vio_cmo_bus_init - CMO entitlement initialization at bus init time + * + * Set up the reserve and excess entitlement pools based on available + * system entitlement and the number of devices in the OF tree that + * require entitlement in the reserve pool. + */ +static void vio_cmo_bus_init(void) +{ + struct hvcall_mpp_data mpp_data; + int err; + + memset(&vio_cmo, 0, sizeof(struct vio_cmo)); + spin_lock_init(&vio_cmo.lock); + INIT_LIST_HEAD(&vio_cmo.device_list); + INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance); + + /* Get current system entitlement */ + err = h_get_mpp(&mpp_data); + + /* + * On failure, continue with entitlement set to 0, will panic() + * later when spare is reserved. + */ + if (err != H_SUCCESS) { + printk(KERN_ERR "%s: unable to determine system IO "\ + "entitlement. (%d)\n", __func__, err); + vio_cmo.entitled = 0; + } else { + vio_cmo.entitled = mpp_data.entitled_mem; + } + + /* Set reservation and check against entitlement */ + vio_cmo.spare = VIO_CMO_MIN_ENT; + vio_cmo.reserve.size = vio_cmo.spare; + vio_cmo.reserve.size += (vio_cmo_num_OF_devs() * + VIO_CMO_MIN_ENT); + if (vio_cmo.reserve.size > vio_cmo.entitled) { + printk(KERN_ERR "%s: insufficient system entitlement\n", + __func__); + panic("%s: Insufficient system entitlement", __func__); + } + + /* Set the remaining accounting variables */ + vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size; + vio_cmo.excess.free = vio_cmo.excess.size; + vio_cmo.min = vio_cmo.reserve.size; + vio_cmo.desired = vio_cmo.reserve.size; +} + +/* sysfs device functions and data structures for CMO */ + +#define viodev_cmo_rd_attr(name) \ +static ssize_t cmo_##name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \ +} + +static ssize_t cmo_allocs_failed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *viodev = to_vio_dev(dev); + return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed)); +} + +static ssize_t cmo_allocs_failed_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + atomic_set(&viodev->cmo.allocs_failed, 0); + return count; +} + +static ssize_t cmo_desired_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct vio_dev *viodev = to_vio_dev(dev); + size_t new_desired; + int ret; + + ret = kstrtoul(buf, 10, &new_desired); + if (ret) + return ret; + + vio_cmo_set_dev_desired(viodev, new_desired); + return count; +} + +viodev_cmo_rd_attr(desired); +viodev_cmo_rd_attr(entitled); +viodev_cmo_rd_attr(allocated); + +static ssize_t name_show(struct device *, struct device_attribute *, char *); +static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf); + +static struct device_attribute dev_attr_name; +static struct device_attribute dev_attr_devspec; +static struct device_attribute dev_attr_modalias; + +static DEVICE_ATTR_RO(cmo_entitled); +static DEVICE_ATTR_RO(cmo_allocated); +static DEVICE_ATTR_RW(cmo_desired); +static DEVICE_ATTR_RW(cmo_allocs_failed); + +static struct attribute *vio_cmo_dev_attrs[] = { + &dev_attr_name.attr, + &dev_attr_devspec.attr, + &dev_attr_modalias.attr, + &dev_attr_cmo_entitled.attr, + &dev_attr_cmo_allocated.attr, + &dev_attr_cmo_desired.attr, + &dev_attr_cmo_allocs_failed.attr, + NULL, +}; +ATTRIBUTE_GROUPS(vio_cmo_dev); + +/* sysfs bus functions and data structures for CMO */ + +#define viobus_cmo_rd_attr(name) \ +static ssize_t cmo_bus_##name##_show(const struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name); \ +} \ +static struct bus_attribute bus_attr_cmo_bus_##name = \ + __ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL) + +#define viobus_cmo_pool_rd_attr(name, var) \ +static ssize_t \ +cmo_##name##_##var##_show(const struct bus_type *bt, char *buf) \ +{ \ + return sprintf(buf, "%lu\n", vio_cmo.name.var); \ +} \ +static BUS_ATTR_RO(cmo_##name##_##var) + +viobus_cmo_rd_attr(entitled); +viobus_cmo_rd_attr(spare); +viobus_cmo_rd_attr(min); +viobus_cmo_rd_attr(desired); +viobus_cmo_rd_attr(curr); +viobus_cmo_pool_rd_attr(reserve, size); +viobus_cmo_pool_rd_attr(excess, size); +viobus_cmo_pool_rd_attr(excess, free); + +static ssize_t cmo_high_show(const struct bus_type *bt, char *buf) +{ + return sprintf(buf, "%lu\n", vio_cmo.high); +} + +static ssize_t cmo_high_store(const struct bus_type *bt, const char *buf, + size_t count) +{ + unsigned long flags; + + spin_lock_irqsave(&vio_cmo.lock, flags); + vio_cmo.high = vio_cmo.curr; + spin_unlock_irqrestore(&vio_cmo.lock, flags); + + return count; +} +static BUS_ATTR_RW(cmo_high); + +static struct attribute *vio_bus_attrs[] = { + &bus_attr_cmo_bus_entitled.attr, + &bus_attr_cmo_bus_spare.attr, + &bus_attr_cmo_bus_min.attr, + &bus_attr_cmo_bus_desired.attr, + &bus_attr_cmo_bus_curr.attr, + &bus_attr_cmo_high.attr, + &bus_attr_cmo_reserve_size.attr, + &bus_attr_cmo_excess_size.attr, + &bus_attr_cmo_excess_free.attr, + NULL, +}; +ATTRIBUTE_GROUPS(vio_bus); + +static void __init vio_cmo_sysfs_init(void) +{ + vio_bus_type.dev_groups = vio_cmo_dev_groups; + vio_bus_type.bus_groups = vio_bus_groups; +} +#else /* CONFIG_PPC_SMLPAR */ +int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; } +void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {} +static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } +static void vio_cmo_bus_remove(struct vio_dev *viodev) {} +static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} +static void vio_cmo_bus_init(void) {} +static void __init vio_cmo_sysfs_init(void) { } +#endif /* CONFIG_PPC_SMLPAR */ +EXPORT_SYMBOL(vio_cmo_entitlement_update); +EXPORT_SYMBOL(vio_cmo_set_dev_desired); + + +/* + * Platform Facilities Option (PFO) support + */ + +/** + * vio_h_cop_sync - Perform a synchronous PFO co-processor operation + * + * @vdev - Pointer to a struct vio_dev for device + * @op - Pointer to a struct vio_pfo_op for the operation parameters + * + * Calls the hypervisor to synchronously perform the PFO operation + * described in @op. In the case of a busy response from the hypervisor, + * the operation will be re-submitted indefinitely unless a non-zero timeout + * is specified or an error occurs. The timeout places a limit on when to + * stop re-submitting a operation, the total time can be exceeded if an + * operation is in progress. + * + * If op->hcall_ret is not NULL, this will be set to the return from the + * last h_cop_op call or it will be 0 if an error not involving the h_call + * was encountered. + * + * Returns: + * 0 on success, + * -EINVAL if the h_call fails due to an invalid parameter, + * -E2BIG if the h_call can not be performed synchronously, + * -EBUSY if a timeout is specified and has elapsed, + * -EACCES if the memory area for data/status has been rescinded, or + * -EPERM if a hardware fault has been indicated + */ +int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op) +{ + struct device *dev = &vdev->dev; + unsigned long deadline = 0; + long hret = 0; + int ret = 0; + + if (op->timeout) + deadline = jiffies + msecs_to_jiffies(op->timeout); + + while (true) { + hret = plpar_hcall_norets(H_COP, op->flags, + vdev->resource_id, + op->in, op->inlen, op->out, + op->outlen, op->csbcpb); + + if (hret == H_SUCCESS || + (hret != H_NOT_ENOUGH_RESOURCES && + hret != H_BUSY && hret != H_RESOURCE) || + (op->timeout && time_after(deadline, jiffies))) + break; + + dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret); + } + + switch (hret) { + case H_SUCCESS: + ret = 0; + break; + case H_OP_MODE: + case H_TOO_BIG: + ret = -E2BIG; + break; + case H_RESCINDED: + ret = -EACCES; + break; + case H_HARDWARE: + ret = -EPERM; + break; + case H_NOT_ENOUGH_RESOURCES: + case H_RESOURCE: + case H_BUSY: + ret = -EBUSY; + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n", + __func__, ret, hret); + + op->hcall_err = hret; + return ret; +} +EXPORT_SYMBOL(vio_h_cop_sync); + +static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) +{ + const __be32 *dma_window; + struct iommu_table *tbl; + unsigned long offset, size; + + dma_window = of_get_property(dev->dev.of_node, + "ibm,my-dma-window", NULL); + if (!dma_window) + return NULL; + + tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); + if (tbl == NULL) + return NULL; + + kref_init(&tbl->it_kref); + + of_parse_dma_window(dev->dev.of_node, dma_window, + &tbl->it_index, &offset, &size); + + /* TCE table size - measured in tce entries */ + tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K; + tbl->it_size = size >> tbl->it_page_shift; + /* offset for VIO should always be 0 */ + tbl->it_offset = offset >> tbl->it_page_shift; + tbl->it_busno = 0; + tbl->it_type = TCE_VB; + tbl->it_blocksize = 16; + + if (firmware_has_feature(FW_FEATURE_LPAR)) + tbl->it_ops = &iommu_table_lpar_multi_ops; + else + tbl->it_ops = &iommu_table_pseries_ops; + + return iommu_init_table(tbl, -1, 0, 0); +} + +/** + * vio_match_device: - Tell if a VIO device has a matching + * VIO device id structure. + * @ids: array of VIO device id structures to search in + * @dev: the VIO device structure to match against + * + * Used by a driver to check whether a VIO device present in the + * system is in its list of supported devices. Returns the matching + * vio_device_id structure or NULL if there is no match. + */ +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *ids, const struct vio_dev *dev) +{ + while (ids->type[0] != '\0') { + if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && + of_device_is_compatible(dev->dev.of_node, + ids->compat)) + return ids; + ids++; + } + return NULL; +} + +/* + * Convert from struct device to struct vio_dev and pass to driver. + * dev->driver has already been set by generic code because vio_bus_match + * succeeded. + */ +static int vio_bus_probe(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + const struct vio_device_id *id; + int error = -ENODEV; + + if (!viodrv->probe) + return error; + + id = vio_match_device(viodrv->id_table, viodev); + if (id) { + memset(&viodev->cmo, 0, sizeof(viodev->cmo)); + if (firmware_has_feature(FW_FEATURE_CMO)) { + error = vio_cmo_bus_probe(viodev); + if (error) + return error; + } + error = viodrv->probe(viodev, id); + if (error && firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_remove(viodev); + } + + return error; +} + +/* convert from struct device to struct vio_dev and pass to driver. */ +static void vio_bus_remove(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv = to_vio_driver(dev->driver); + struct device *devptr; + + /* + * Hold a reference to the device after the remove function is called + * to allow for CMO accounting cleanup for the device. + */ + devptr = get_device(dev); + + if (viodrv->remove) + viodrv->remove(viodev); + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_remove(viodev); + + put_device(devptr); +} + +static void vio_bus_shutdown(struct device *dev) +{ + struct vio_dev *viodev = to_vio_dev(dev); + struct vio_driver *viodrv; + + if (dev->driver) { + viodrv = to_vio_driver(dev->driver); + if (viodrv->shutdown) + viodrv->shutdown(viodev); + else if (kexec_in_progress) + vio_bus_remove(dev); + } +} + +/** + * vio_register_driver: - Register a new vio driver + * @viodrv: The vio_driver structure to be registered. + */ +int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, + const char *mod_name) +{ + // vio_bus_type is only initialised for pseries + if (!machine_is(pseries)) + return -ENODEV; + + pr_debug("%s: driver %s registering\n", __func__, viodrv->name); + + /* fill in 'struct driver' fields */ + viodrv->driver.name = viodrv->name; + viodrv->driver.pm = viodrv->pm; + viodrv->driver.bus = &vio_bus_type; + viodrv->driver.owner = owner; + viodrv->driver.mod_name = mod_name; + + return driver_register(&viodrv->driver); +} +EXPORT_SYMBOL(__vio_register_driver); + +/** + * vio_unregister_driver - Remove registration of vio driver. + * @viodrv: The vio_driver struct to be removed form registration + */ +void vio_unregister_driver(struct vio_driver *viodrv) +{ + driver_unregister(&viodrv->driver); +} +EXPORT_SYMBOL(vio_unregister_driver); + +/* vio_dev refcount hit 0 */ +static void vio_dev_release(struct device *dev) +{ + struct iommu_table *tbl = get_iommu_table_base(dev); + + if (tbl) + iommu_tce_table_put(tbl); + of_node_put(dev->of_node); + kfree(to_vio_dev(dev)); +} + +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev *vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + struct device_node *parent_node; + const __be32 *prop; + enum vio_dev_family family; + + /* + * Determine if this node is a under the /vdevice node or under the + * /ibm,platform-facilities node. This decides the device's family. + */ + parent_node = of_get_parent(of_node); + if (parent_node) { + if (of_node_is_type(parent_node, "ibm,platform-facilities")) + family = PFO; + else if (of_node_is_type(parent_node, "vdevice")) + family = VDEVICE; + else { + pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n", + __func__, + parent_node, + of_node); + of_node_put(parent_node); + return NULL; + } + of_node_put(parent_node); + } else { + pr_warn("%s: could not determine the parent of node %pOFn.\n", + __func__, of_node); + return NULL; + } + + if (family == PFO) { + if (of_property_read_bool(of_node, "interrupt-controller")) { + pr_debug("%s: Skipping the interrupt controller %pOFn.\n", + __func__, of_node); + return NULL; + } + } + + /* allocate a vio_dev for this node */ + viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (viodev == NULL) { + pr_warn("%s: allocation failure for VIO device.\n", __func__); + return NULL; + } + + /* we need the 'device_type' property, in order to match with drivers */ + viodev->family = family; + if (viodev->family == VDEVICE) { + unsigned int unit_address; + + viodev->type = of_node_get_device_type(of_node); + if (!viodev->type) { + pr_warn("%s: node %pOFn is missing the 'device_type' " + "property.\n", __func__, of_node); + goto out; + } + + prop = of_get_property(of_node, "reg", NULL); + if (prop == NULL) { + pr_warn("%s: node %pOFn missing 'reg'\n", + __func__, of_node); + goto out; + } + unit_address = of_read_number(prop, 1); + dev_set_name(&viodev->dev, "%x", unit_address); + viodev->irq = irq_of_parse_and_map(of_node, 0); + viodev->unit_address = unit_address; + } else { + /* PFO devices need their resource_id for submitting COP_OPs + * This is an optional field for devices, but is required when + * performing synchronous ops */ + prop = of_get_property(of_node, "ibm,resource-id", NULL); + if (prop != NULL) + viodev->resource_id = of_read_number(prop, 1); + + dev_set_name(&viodev->dev, "%pOFn", of_node); + viodev->type = dev_name(&viodev->dev); + viodev->irq = 0; + } + + viodev->name = of_node->name; + viodev->dev.of_node = of_node_get(of_node); + + set_dev_node(&viodev->dev, of_node_to_nid(of_node)); + + /* init generic 'struct device' fields: */ + viodev->dev.parent = &vio_bus_device.dev; + viodev->dev.bus = &vio_bus_type; + viodev->dev.release = vio_dev_release; + + if (of_property_present(viodev->dev.of_node, "ibm,my-dma-window")) { + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_set_dma_ops(viodev); + else + set_dma_ops(&viodev->dev, &dma_iommu_ops); + + set_iommu_table_base(&viodev->dev, + vio_build_iommu_table(viodev)); + + /* needed to ensure proper operation of coherent allocations + * later, in case driver doesn't set it explicitly */ + viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64); + viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask; + } + + /* register with generic device framework */ + if (device_register(&viodev->dev)) { + printk(KERN_ERR "%s: failed to register device %s\n", + __func__, dev_name(&viodev->dev)); + put_device(&viodev->dev); + return NULL; + } + + return viodev; + +out: /* Use this exit point for any return prior to device_register */ + kfree(viodev); + + return NULL; +} +EXPORT_SYMBOL(vio_register_device_node); + +/* + * vio_bus_scan_for_devices - Scan OF and register each child device + * @root_name - OF node name for the root of the subtree to search. + * This must be non-NULL + * + * Starting from the root node provide, register the device node for + * each child beneath the root. + */ +static void __init vio_bus_scan_register_devices(char *root_name) +{ + struct device_node *node_root, *node_child; + + if (!root_name) + return; + + node_root = of_find_node_by_name(NULL, root_name); + if (node_root) { + + /* + * Create struct vio_devices for each virtual device in + * the device tree. Drivers will associate with them later. + */ + node_child = of_get_next_child(node_root, NULL); + while (node_child) { + vio_register_device_node(node_child); + node_child = of_get_next_child(node_root, node_child); + } + of_node_put(node_root); + } +} + +/** + * vio_bus_init: - Initialize the virtual IO bus + */ +static int __init vio_bus_init(void) +{ + int err; + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_sysfs_init(); + + err = bus_register(&vio_bus_type); + if (err) { + printk(KERN_ERR "failed to register VIO bus\n"); + return err; + } + + /* + * The fake parent of all vio devices, just to give us + * a nice directory + */ + err = device_register(&vio_bus_device.dev); + if (err) { + printk(KERN_WARNING "%s: device_register returned %i\n", + __func__, err); + return err; + } + + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_bus_init(); + + return 0; +} +machine_postcore_initcall(pseries, vio_bus_init); + +static int __init vio_device_init(void) +{ + vio_bus_scan_register_devices("vdevice"); + vio_bus_scan_register_devices("ibm,platform-facilities"); + + return 0; +} +machine_device_initcall(pseries, vio_device_init); + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_vio_dev(dev)->name); +} +static DEVICE_ATTR_RO(name); + +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%pOF\n", of_node); +} +static DEVICE_ATTR_RO(devspec); + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct device_node *dn; + const char *cp; + + dn = dev->of_node; + if (!dn) { + strcpy(buf, "\n"); + return strlen(buf); + } + cp = of_get_property(dn, "compatible", NULL); + if (!cp) { + strcpy(buf, "\n"); + return strlen(buf); + } + + return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *vio_dev_attrs[] = { + &dev_attr_name.attr, + &dev_attr_devspec.attr, + &dev_attr_modalias.attr, + NULL, +}; +ATTRIBUTE_GROUPS(vio_dev); + +void vio_unregister_device(struct vio_dev *viodev) +{ + device_unregister(&viodev->dev); + if (viodev->family == VDEVICE) + irq_dispose_mapping(viodev->irq); +} +EXPORT_SYMBOL(vio_unregister_device); + +static int vio_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_device_id *ids = vio_drv->id_table; + + return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); +} + +static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + const struct device_node *dn; + const char *cp; + + dn = dev->of_node; + if (!dn) + return -ENODEV; + cp = of_get_property(dn, "compatible", NULL); + if (!cp) + return -ENODEV; + + add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp); + return 0; +} + +struct bus_type vio_bus_type = { + .name = "vio", + .dev_groups = vio_dev_groups, + .uevent = vio_hotplug, + .match = vio_bus_match, + .probe = vio_bus_probe, + .remove = vio_bus_remove, + .shutdown = vio_bus_shutdown, +}; + +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's of_get_property() to return the value of the + * attribute specified by @which +*/ +const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length) +{ + return of_get_property(vdev->dev.of_node, which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + */ +static struct vio_dev *vio_find_name(const char *name) +{ + struct device *found; + + found = bus_find_device_by_name(&vio_bus_type, NULL, name); + if (!found) + return NULL; + + return to_vio_dev(found); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + * + * Takes a reference to the embedded struct device which needs to be dropped + * after use. + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + char kobj_name[20]; + struct device_node *vnode_parent; + + vnode_parent = of_get_parent(vnode); + if (!vnode_parent) + return NULL; + + /* construct the kobject name from the device node */ + if (of_node_is_type(vnode_parent, "vdevice")) { + const __be32 *prop; + + prop = of_get_property(vnode, "reg", NULL); + if (!prop) + goto out; + snprintf(kobj_name, sizeof(kobj_name), "%x", + (uint32_t)of_read_number(prop, 1)); + } else if (of_node_is_type(vnode_parent, "ibm,platform-facilities")) + snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode); + else + goto out; + + of_node_put(vnode_parent); + return vio_find_name(kobj_name); +out: + of_node_put(vnode_parent); + return NULL; +} +EXPORT_SYMBOL(vio_find_node); + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_SUCCESS) + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_SUCCESS) + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); + +static int __init vio_init(void) +{ + dma_debug_add_bus(&vio_bus_type); + return 0; +} +machine_fs_initcall(pseries, vio_init); diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c new file mode 100644 index 0000000000..3f85ece3c8 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vphn.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +/* + * The associativity domain numbers are returned from the hypervisor as a + * stream of mixed 16-bit and 32-bit fields. The stream is terminated by the + * special value of "all ones" (aka. 0xffff) and its size may not exceed 48 + * bytes. + * + * --- 16-bit fields --> + * _________________________ + * | 0 | 1 | 2 | 3 | be_packed[0] + * ------+-----+-----+------ + * _________________________ + * | 4 | 5 | 6 | 7 | be_packed[1] + * ------------------------- + * ... + * _________________________ + * | 20 | 21 | 22 | 23 | be_packed[5] + * ------------------------- + * + * Convert to the sequence they would appear in the ibm,associativity property. + */ +static int vphn_unpack_associativity(const long *packed, __be32 *unpacked) +{ + __be64 be_packed[VPHN_REGISTER_COUNT]; + int i, nr_assoc_doms = 0; + const __be16 *field = (const __be16 *) be_packed; + u16 last = 0; + bool is_32bit = false; + +#define VPHN_FIELD_UNUSED (0xffff) +#define VPHN_FIELD_MSB (0x8000) +#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB) + + /* Let's fix the values returned by plpar_hcall9() */ + for (i = 0; i < VPHN_REGISTER_COUNT; i++) + be_packed[i] = cpu_to_be64(packed[i]); + + for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) { + u16 new = be16_to_cpup(field++); + + if (is_32bit) { + /* + * Let's concatenate the 16 bits of this field to the + * 15 lower bits of the previous field + */ + unpacked[++nr_assoc_doms] = + cpu_to_be32(last << 16 | new); + is_32bit = false; + } else if (new == VPHN_FIELD_UNUSED) + /* This is the list terminator */ + break; + else if (new & VPHN_FIELD_MSB) { + /* Data is in the lower 15 bits of this field */ + unpacked[++nr_assoc_doms] = + cpu_to_be32(new & VPHN_FIELD_MASK); + } else { + /* + * Data is in the lower 15 bits of this field + * concatenated with the next 16 bit field + */ + last = new; + is_32bit = true; + } + } + + /* The first cell contains the length of the property */ + unpacked[0] = cpu_to_be32(nr_assoc_doms); + + return nr_assoc_doms; +} + +/* NOTE: This file is included by a selftest and built in userspace. */ +#ifdef __KERNEL__ +#include + +long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) +{ + long rc; + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu); + if (rc == H_SUCCESS) + vphn_unpack_associativity(retbuf, associativity); + + return rc; +} +#endif -- cgit v1.2.3